From cc547400d6d2b2b96c02552a85fb8b02edf4a834 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Mon, 19 Feb 2024 17:06:02 -0300 Subject: [PATCH 01/29] Add sub-allocated descriptor set header --- examples_tests | 2 +- .../nbl/core/alloc/address_allocator_traits.h | 22 ++++- .../video/alloc/SubAllocatedDescriptorSet.h | 99 +++++++++++++++++++ 3 files changed, 121 insertions(+), 2 deletions(-) create mode 100644 include/nbl/video/alloc/SubAllocatedDescriptorSet.h diff --git a/examples_tests b/examples_tests index d9f006128b..f18077bc18 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit d9f006128b7ccb8e6878a74e9ce1d12143370e26 +Subproject commit f18077bc181132afe687b3d3fec3783c3c272a4b diff --git a/include/nbl/core/alloc/address_allocator_traits.h b/include/nbl/core/alloc/address_allocator_traits.h index 293dc3503e..1b228154f9 100644 --- a/include/nbl/core/alloc/address_allocator_traits.h +++ b/include/nbl/core/alloc/address_allocator_traits.h @@ -53,6 +53,18 @@ namespace nbl::core } } + static inline void multi_alloc_addr(AddressAlloc& alloc, uint32_t count, size_type* outAddresses, const size_type* bytes, + const size_type alignment, const size_type* hint=nullptr) noexcept + { + for (uint32_t i=0; i::value>::multi_alloc_addr( + alloc,std::min(count-i,maxMultiOps),outAddresses+i,bytes+i,alignment,hint ? (hint+i):nullptr); + } + static inline void multi_free_addr(AddressAlloc& alloc, uint32_t count, const size_type* addr, const size_type* bytes) noexcept { for (uint32_t i=0; i + +namespace nbl::video +{ + +class SubAllocatedDescriptorSet : public core::IReferenceCounted +{ +public: + // address allocator gives offsets + // reserved allocator allocates memory to keep the address allocator state inside + using AddressAllocator = core::GeneralpurposeAddressAllocator; + using ReservedAllocator = core::allocator; + using size_type = typename AddressAllocator::size_type; + using value_type = typename AddressAllocator::size_type; + static constexpr value_type invalid_value = AddressAllocator::invalid_address; + +protected: + struct SubAllocDescriptorSetRange { + video::IGPUDescriptorSetLayout::SBinding binding; + std::shared_ptr addressAllocator; + std::shared_ptr reservedAllocator; + size_t reservedSize; + }; + std::vector m_allocatableRanges = {}; + + +public: + // constructors + template + inline SubAllocatedDescriptorSet(const std::span bindings, + const value_type maxAllocatableAlignment, Args&&... args) + { + m_allocatableRanges.reserve(bindings.size()); + + for (auto& binding : bindings) + { + SubAllocDescriptorSetRange range; + range.binding = binding; + range.reservedSize = 0; + // Only bindings with these flags will be allocatable + if (binding.createFlags.hasFlags(core::bitflag(IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT) + | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT + | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT)) + { + range.reservedSize = AddressAllocator::reserved_size(maxAllocatableAlignment, static_cast(binding.count), args...); + range.reservedAllocator = std::shared_ptr(new ReservedAllocator()); + range.addressAllocator = std::shared_ptr(new AddressAllocator( + range.reservedAllocator->allocate(range.reservedSize, _NBL_SIMD_ALIGNMENT), + static_cast(0), 0u, maxAllocatableAlignment, static_cast(binding.count), std::forward(args)... + )); + } + m_allocatableRanges.push_back(range); + } + + } + + ~SubAllocatedDescriptorSet() + { + for (uint32_t i = 0; i < m_allocatableRanges.size(); i++) + { + auto& range = m_allocatableRanges[i]; + if (range.reservedSize == 0) + continue; + + auto ptr = reinterpret_cast(core::address_allocator_traits::getReservedSpacePtr(*range.addressAllocator)); + range.reservedAllocator->deallocate(const_cast(ptr), range.reservedSize); + } + } + + // main methods + + //! Warning `outAddresses` needs to be primed with `invalid_value` values, otherwise no allocation happens for elements not equal to `invalid_value` + template + inline void multi_allocate(uint32_t binding, uint32_t count, value_type* outAddresses, const size_type* sizes, const Args&... args) + { + auto& range = m_allocatableRanges[binding]; + assert(range.reservedSize); // Check if this binding has an allocator + core::address_allocator_traits::multi_alloc_addr(*range.addressAllocator, count, outAddresses, sizes, 1, args...); + } + inline void multi_deallocate(uint32_t binding, uint32_t count, const size_type* addr, const size_type* sizes) + { + auto& range = m_allocatableRanges[binding]; + assert(range.reservedSize); // Check if this binding has an allocator + core::address_allocator_traits::multi_free_addr(*range.addressAllocator, count, addr, sizes); + } +}; + +} + +#endif From d10059ff2cd8d87a4eea46d3ebfd6a8ba45b4c57 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Mon, 19 Feb 2024 17:18:01 -0300 Subject: [PATCH 02/29] Add some reusable binding API --- .../video/alloc/SubAllocatedDescriptorSet.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index 04d08d84dc..346e58fd28 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -76,21 +76,31 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted } } + // amount of bindings in the descriptor set layout used + uint32_t getLayoutBindingCount() { return m_allocatableRanges.size(); } + + // whether that binding index can be sub-allocated + bool isBindingAllocatable(uint32_t binding) { return m_allocatableRanges[binding].reservedSize != 0; } + + AddressAllocator& getBindingAllocator(uint32_t binding) + { + assert(isBindingAllocatable(binding)); // Check if this binding has an allocator + return *m_allocatableRanges[binding].addressAllocator; + } + // main methods //! Warning `outAddresses` needs to be primed with `invalid_value` values, otherwise no allocation happens for elements not equal to `invalid_value` template inline void multi_allocate(uint32_t binding, uint32_t count, value_type* outAddresses, const size_type* sizes, const Args&... args) { - auto& range = m_allocatableRanges[binding]; - assert(range.reservedSize); // Check if this binding has an allocator - core::address_allocator_traits::multi_alloc_addr(*range.addressAllocator, count, outAddresses, sizes, 1, args...); + core::address_allocator_traits::multi_alloc_addr(getBindingAllocator(binding), count, outAddresses, sizes, 1, args...); } inline void multi_deallocate(uint32_t binding, uint32_t count, const size_type* addr, const size_type* sizes) { auto& range = m_allocatableRanges[binding]; assert(range.reservedSize); // Check if this binding has an allocator - core::address_allocator_traits::multi_free_addr(*range.addressAllocator, count, addr, sizes); + core::address_allocator_traits::multi_free_addr(getBindingAllocator(binding), count, addr, sizes); } }; From 347ff632dbde06277b2bac49dca4cafd15549e97 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Tue, 20 Feb 2024 15:35:25 -0300 Subject: [PATCH 03/29] Work on using descriptor set layout directly --- include/nbl/asset/IDescriptorSetLayout.h | 6 ++ .../video/alloc/SubAllocatedDescriptorSet.h | 72 +++++++++++++------ 2 files changed, 58 insertions(+), 20 deletions(-) diff --git a/include/nbl/asset/IDescriptorSetLayout.h b/include/nbl/asset/IDescriptorSetLayout.h index 6feea80c74..9317379b44 100644 --- a/include/nbl/asset/IDescriptorSetLayout.h +++ b/include/nbl/asset/IDescriptorSetLayout.h @@ -118,6 +118,12 @@ class IDescriptorSetLayout : public virtual core::IReferenceCounted // TODO: tr return m_stageFlags[index.data]; } + inline core::bitflag getCreateFlags(const storage_range_index_t index) const + { + assert(index.data < m_count); + return m_createFlags[index.data]; + } + inline uint32_t getCount(const storage_range_index_t index) const { assert(index.data < m_count); diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index 346e58fd28..f585e71d8b 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -25,7 +25,6 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted protected: struct SubAllocDescriptorSetRange { - video::IGPUDescriptorSetLayout::SBinding binding; std::shared_ptr addressAllocator; std::shared_ptr reservedAllocator; size_t reservedSize; @@ -36,31 +35,64 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted public: // constructors template - inline SubAllocatedDescriptorSet(const std::span bindings, - const value_type maxAllocatableAlignment, Args&&... args) + inline SubAllocatedDescriptorSet(video::IGPUDescriptorSetLayout* layout, const value_type maxAllocatableAlignment, Args&&... args) { - m_allocatableRanges.reserve(bindings.size()); - - for (auto& binding : bindings) + for (uint32_t descriptorType = 0; descriptorType < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); descriptorType++) { - SubAllocDescriptorSetRange range; - range.binding = binding; - range.reservedSize = 0; - // Only bindings with these flags will be allocatable - if (binding.createFlags.hasFlags(core::bitflag(IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT) - | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT - | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT)) + auto descType = static_cast(descriptorType); + auto& redirect = layout->getDescriptorRedirect(descType); + + for (uint32_t i = 0; i < redirect.getBindingCount(); i++) { - range.reservedSize = AddressAllocator::reserved_size(maxAllocatableAlignment, static_cast(binding.count), args...); - range.reservedAllocator = std::shared_ptr(new ReservedAllocator()); - range.addressAllocator = std::shared_ptr(new AddressAllocator( - range.reservedAllocator->allocate(range.reservedSize, _NBL_SIMD_ALIGNMENT), - static_cast(0), 0u, maxAllocatableAlignment, static_cast(binding.count), std::forward(args)... - )); + auto binding = redirect.getBinding(i); + auto storageIndex = redirect.findBindingStorageIndex(binding); + + auto count = redirect.getCount(storageIndex); + auto flags = redirect.getCreateFlags(storageIndex); + + for (uint32_t j = m_allocatableRanges.size(); j < binding.data; j++) + { + m_allocatableRanges.push_back({}); + } + + SubAllocDescriptorSetRange range; + range.reservedSize = 0; + // Only bindings with these flags will be allocatable + if (flags.hasFlags(core::bitflag(IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT) + | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT + | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT)) + { + range.reservedSize = AddressAllocator::reserved_size(maxAllocatableAlignment, static_cast(count), args...); + range.reservedAllocator = std::shared_ptr(new ReservedAllocator()); + range.addressAllocator = std::shared_ptr(new AddressAllocator( + range.reservedAllocator->allocate(range.reservedSize, _NBL_SIMD_ALIGNMENT), + static_cast(0), 0u, maxAllocatableAlignment, static_cast(count), std::forward(args)... + )); + } + m_allocatableRanges.insert(m_allocatableRanges.begin() + binding.data, range); } - m_allocatableRanges.push_back(range); } + // for (auto& binding : bindings) + // { + // SubAllocDescriptorSetRange range; + // range.binding = binding; + // range.reservedSize = 0; + // // Only bindings with these flags will be allocatable + // if (binding.createFlags.hasFlags(core::bitflag(IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT) + // | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT + // | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT)) + // { + // range.reservedSize = AddressAllocator::reserved_size(maxAllocatableAlignment, static_cast(binding.count), args...); + // range.reservedAllocator = std::shared_ptr(new ReservedAllocator()); + // range.addressAllocator = std::shared_ptr(new AddressAllocator( + // range.reservedAllocator->allocate(range.reservedSize, _NBL_SIMD_ALIGNMENT), + // static_cast(0), 0u, maxAllocatableAlignment, static_cast(binding.count), std::forward(args)... + // )); + // } + // m_allocatableRanges.push_back(range); + // } + } ~SubAllocatedDescriptorSet() From e1282c714aa6c206d43e7a875de9c1e148168e74 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Tue, 20 Feb 2024 15:35:40 -0300 Subject: [PATCH 04/29] Remove out old bindings --- .../video/alloc/SubAllocatedDescriptorSet.h | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index f585e71d8b..0d67b79150 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -72,27 +72,6 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted m_allocatableRanges.insert(m_allocatableRanges.begin() + binding.data, range); } } - - // for (auto& binding : bindings) - // { - // SubAllocDescriptorSetRange range; - // range.binding = binding; - // range.reservedSize = 0; - // // Only bindings with these flags will be allocatable - // if (binding.createFlags.hasFlags(core::bitflag(IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT) - // | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT - // | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT)) - // { - // range.reservedSize = AddressAllocator::reserved_size(maxAllocatableAlignment, static_cast(binding.count), args...); - // range.reservedAllocator = std::shared_ptr(new ReservedAllocator()); - // range.addressAllocator = std::shared_ptr(new AddressAllocator( - // range.reservedAllocator->allocate(range.reservedSize, _NBL_SIMD_ALIGNMENT), - // static_cast(0), 0u, maxAllocatableAlignment, static_cast(binding.count), std::forward(args)... - // )); - // } - // m_allocatableRanges.push_back(range); - // } - } ~SubAllocatedDescriptorSet() From 28611be7a01eedd7b2d831df17cf80d91062d43a Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Wed, 21 Feb 2024 08:54:45 -0300 Subject: [PATCH 05/29] Use pool address allocator --- .../video/alloc/SubAllocatedDescriptorSet.h | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index 0d67b79150..47d5b9b88e 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -17,7 +17,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted public: // address allocator gives offsets // reserved allocator allocates memory to keep the address allocator state inside - using AddressAllocator = core::GeneralpurposeAddressAllocator; + using AddressAllocator = core::PoolAddressAllocator; using ReservedAllocator = core::allocator; using size_type = typename AddressAllocator::size_type; using value_type = typename AddressAllocator::size_type; @@ -102,16 +102,27 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted // main methods //! Warning `outAddresses` needs to be primed with `invalid_value` values, otherwise no allocation happens for elements not equal to `invalid_value` - template - inline void multi_allocate(uint32_t binding, uint32_t count, value_type* outAddresses, const size_type* sizes, const Args&... args) + inline void multi_allocate(uint32_t binding, uint32_t count, value_type* outAddresses) { - core::address_allocator_traits::multi_alloc_addr(getBindingAllocator(binding), count, outAddresses, sizes, 1, args...); + auto& allocator = getBindingAllocator(binding); + for (uint32_t i=0; i::multi_free_addr(getBindingAllocator(binding), count, addr, sizes); + auto& allocator = getBindingAllocator(binding); + for (uint32_t i=0; i Date: Wed, 21 Feb 2024 09:08:04 -0300 Subject: [PATCH 06/29] Use map --- .../video/alloc/SubAllocatedDescriptorSet.h | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index 47d5b9b88e..38732aef39 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -29,7 +29,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted std::shared_ptr reservedAllocator; size_t reservedSize; }; - std::vector m_allocatableRanges = {}; + std::unordered_map m_allocatableRanges = {}; public: @@ -50,26 +50,20 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted auto count = redirect.getCount(storageIndex); auto flags = redirect.getCreateFlags(storageIndex); - for (uint32_t j = m_allocatableRanges.size(); j < binding.data; j++) - { - m_allocatableRanges.push_back({}); - } - - SubAllocDescriptorSetRange range; - range.reservedSize = 0; // Only bindings with these flags will be allocatable if (flags.hasFlags(core::bitflag(IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT) | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT)) { + SubAllocDescriptorSetRange range; range.reservedSize = AddressAllocator::reserved_size(maxAllocatableAlignment, static_cast(count), args...); range.reservedAllocator = std::shared_ptr(new ReservedAllocator()); range.addressAllocator = std::shared_ptr(new AddressAllocator( range.reservedAllocator->allocate(range.reservedSize, _NBL_SIMD_ALIGNMENT), static_cast(0), 0u, maxAllocatableAlignment, static_cast(count), std::forward(args)... )); + m_allocatableRanges.emplace(binding.data, range); } - m_allocatableRanges.insert(m_allocatableRanges.begin() + binding.data, range); } } } @@ -91,12 +85,16 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted uint32_t getLayoutBindingCount() { return m_allocatableRanges.size(); } // whether that binding index can be sub-allocated - bool isBindingAllocatable(uint32_t binding) { return m_allocatableRanges[binding].reservedSize != 0; } + bool isBindingAllocatable(uint32_t binding) + { + return m_allocatableRanges.find(binding) != m_allocatableRanges.end(); + } AddressAllocator& getBindingAllocator(uint32_t binding) { - assert(isBindingAllocatable(binding)); // Check if this binding has an allocator - return *m_allocatableRanges[binding].addressAllocator; + auto range = m_allocatableRanges.find(binding); + assert(range != m_allocatableRanges.end());// Check if this binding has an allocator + return *range->second.addressAllocator; } // main methods From 190067a8e681ec62d352bbde33b3e0ae58820e2d Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Wed, 21 Feb 2024 13:50:44 -0300 Subject: [PATCH 07/29] PR reviews --- .../video/alloc/SubAllocatedDescriptorSet.h | 47 ++++++++++++------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index 38732aef39..fdb8107a9d 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -29,13 +29,19 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted std::shared_ptr reservedAllocator; size_t reservedSize; }; - std::unordered_map m_allocatableRanges = {}; + std::map m_allocatableRanges = {}; + #ifdef _NBL_DEBUG + std::recursive_mutex stAccessVerfier; + #endif // _NBL_DEBUG + + constexpr static inline uint32_t MaxDescriptorSetAllocationAlignment = 64u*1024u; // if you need larger alignments then you're not right in the head + constexpr static inline uint32_t MinDescriptorSetAllocationSize = 1u; public: // constructors template - inline SubAllocatedDescriptorSet(video::IGPUDescriptorSetLayout* layout, const value_type maxAllocatableAlignment, Args&&... args) + inline SubAllocatedDescriptorSet(video::IGPUDescriptorSetLayout* layout) { for (uint32_t descriptorType = 0; descriptorType < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); descriptorType++) { @@ -56,11 +62,12 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT)) { SubAllocDescriptorSetRange range; - range.reservedSize = AddressAllocator::reserved_size(maxAllocatableAlignment, static_cast(count), args...); + range.reservedSize = AddressAllocator::reserved_size(MaxDescriptorSetAllocationAlignment, static_cast(count), MinDescriptorSetAllocationSize); range.reservedAllocator = std::shared_ptr(new ReservedAllocator()); range.addressAllocator = std::shared_ptr(new AddressAllocator( range.reservedAllocator->allocate(range.reservedSize, _NBL_SIMD_ALIGNMENT), - static_cast(0), 0u, maxAllocatableAlignment, static_cast(count), std::forward(args)... + static_cast(0), 0u, MaxDescriptorSetAllocationAlignment, static_cast(count), + MinDescriptorSetAllocationSize )); m_allocatableRanges.emplace(binding.data, range); } @@ -75,26 +82,20 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted auto& range = m_allocatableRanges[i]; if (range.reservedSize == 0) continue; - auto ptr = reinterpret_cast(core::address_allocator_traits::getReservedSpacePtr(*range.addressAllocator)); + range.addressAllocator->~PoolAddressAllocator(); range.reservedAllocator->deallocate(const_cast(ptr), range.reservedSize); } } - // amount of bindings in the descriptor set layout used - uint32_t getLayoutBindingCount() { return m_allocatableRanges.size(); } - // whether that binding index can be sub-allocated - bool isBindingAllocatable(uint32_t binding) - { - return m_allocatableRanges.find(binding) != m_allocatableRanges.end(); - } + bool isBindingAllocatable(uint32_t binding) { return m_allocatableRanges.find(binding) != m_allocatableRanges.end(); } - AddressAllocator& getBindingAllocator(uint32_t binding) + AddressAllocator* getBindingAllocator(uint32_t binding) { auto range = m_allocatableRanges.find(binding); assert(range != m_allocatableRanges.end());// Check if this binding has an allocator - return *range->second.addressAllocator; + return range->second.addressAllocator.get(); } // main methods @@ -102,24 +103,34 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted //! Warning `outAddresses` needs to be primed with `invalid_value` values, otherwise no allocation happens for elements not equal to `invalid_value` inline void multi_allocate(uint32_t binding, uint32_t count, value_type* outAddresses) { - auto& allocator = getBindingAllocator(binding); + #ifdef _NBL_DEBUG + std::unique_lock tLock(stAccessVerfier,std::try_to_lock_t()); + assert(tLock.owns_lock()); + #endif // _NBL_DEBUG + + auto allocator = getBindingAllocator(binding); for (uint32_t i=0; ialloc_addr(1,1); } } inline void multi_deallocate(uint32_t binding, uint32_t count, const size_type* addr) { - auto& allocator = getBindingAllocator(binding); + #ifdef _NBL_DEBUG + std::unique_lock tLock(stAccessVerfier,std::try_to_lock_t()); + assert(tLock.owns_lock()); + #endif // _NBL_DEBUG + + auto allocator = getBindingAllocator(binding); for (uint32_t i=0; ifree_addr(addr[i],1); } } }; From 289e424c175d6cc599f643473df5c3b37b69b77a Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Wed, 21 Feb 2024 16:31:34 -0300 Subject: [PATCH 08/29] PR reviews --- include/nbl/video/alloc/SubAllocatedDescriptorSet.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index fdb8107a9d..a5f1d6291c 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -35,7 +35,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted std::recursive_mutex stAccessVerfier; #endif // _NBL_DEBUG - constexpr static inline uint32_t MaxDescriptorSetAllocationAlignment = 64u*1024u; // if you need larger alignments then you're not right in the head + constexpr static inline uint32_t MaxDescriptorSetAllocationAlignment = 1u; constexpr static inline uint32_t MinDescriptorSetAllocationSize = 1u; public: @@ -83,7 +83,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted if (range.reservedSize == 0) continue; auto ptr = reinterpret_cast(core::address_allocator_traits::getReservedSpacePtr(*range.addressAllocator)); - range.addressAllocator->~PoolAddressAllocator(); + range.addressAllocator = nullptr; range.reservedAllocator->deallocate(const_cast(ptr), range.reservedSize); } } From e0e91ffa89471e27724940a9154bbee198a39026 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Sun, 25 Feb 2024 23:33:39 -0300 Subject: [PATCH 09/29] Work on deferred freeing the descriptors --- .../video/alloc/SubAllocatedDescriptorSet.h | 86 ++++++++++++++++--- 1 file changed, 75 insertions(+), 11 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index a5f1d6291c..0a503c79c8 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -23,12 +23,49 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted using value_type = typename AddressAllocator::size_type; static constexpr value_type invalid_value = AddressAllocator::invalid_address; + class DeferredFreeFunctor + { + public: + inline DeferredFreeFunctor(SubAllocatedDescriptorSet* composed, uint32_t binding, size_type count, value_type* addresses) + : m_addresses(addresses, addresses + count), m_binding(binding), m_composed(composed) + { + } + + // Just does the de-allocation + inline void operator()() + { + // isn't assert already debug-only? + #ifdef _NBL_DEBUG + assert(m_composed); + #endif // _NBL_DEBUG + m_composed->multi_deallocate(m_binding, m_addresses.size(), &m_addresses[0]); + } + + // Takes count of allocations we want to free up as reference, true is returned if + // the amount of allocations freed was >= allocationsToFreeUp + // False is returned if there are more allocations to free up + inline bool operator()(size_type allocationsToFreeUp) + { + auto prevCount = m_addresses.size(); + operator()(); + auto totalFreed = m_addresses.size() - prevCount; + + // This does the same logic as bool operator()(size_type&) on + // CAsyncSingleBufferSubAllocator + return totalFreed >= allocationsToFreeUp; + } + protected: + SubAllocatedDescriptorSet* m_composed; + uint32_t m_binding; + std::vector m_addresses; + }; protected: struct SubAllocDescriptorSetRange { std::shared_ptr addressAllocator; std::shared_ptr reservedAllocator; size_t reservedSize; }; + MultiTimelineEventHandlerST eventHandler; std::map m_allocatableRanges = {}; #ifdef _NBL_DEBUG @@ -39,6 +76,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted constexpr static inline uint32_t MinDescriptorSetAllocationSize = 1u; public: + // constructors template inline SubAllocatedDescriptorSet(video::IGPUDescriptorSetLayout* layout) @@ -100,16 +138,24 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted // main methods - //! Warning `outAddresses` needs to be primed with `invalid_value` values, otherwise no allocation happens for elements not equal to `invalid_value` - inline void multi_allocate(uint32_t binding, uint32_t count, value_type* outAddresses) +#ifdef _NBL_DEBUG + std::unique_lock stAccessVerifyDebugGuard() { - #ifdef _NBL_DEBUG std::unique_lock tLock(stAccessVerfier,std::try_to_lock_t()); assert(tLock.owns_lock()); - #endif // _NBL_DEBUG + return tLock; + } +#else + bool stAccessVerifyDebugGuard() { return false; } +#endif + + //! Warning `outAddresses` needs to be primed with `invalid_value` values, otherwise no allocation happens for elements not equal to `invalid_value` + inline void multi_allocate(uint32_t binding, size_type count, value_type* outAddresses) + { + auto debugGuard = stAccessVerifyDebugGuard(); auto allocator = getBindingAllocator(binding); - for (uint32_t i=0; ialloc_addr(1,1); } } - inline void multi_deallocate(uint32_t binding, uint32_t count, const size_type* addr) + inline void multi_deallocate(uint32_t binding, size_type count, const size_type* addr) { - #ifdef _NBL_DEBUG - std::unique_lock tLock(stAccessVerfier,std::try_to_lock_t()); - assert(tLock.owns_lock()); - #endif // _NBL_DEBUG + auto debugGuard = stAccessVerifyDebugGuard(); auto allocator = getBindingAllocator(binding); - for (uint32_t i=0; ifree_addr(addr[i],1); } } + //! + inline void multi_deallocate(const ISemaphore::SWaitInfo& futureWait, DeferredFreeFunctor&& functor) noexcept + { + auto debugGuard = stAccessVerifyDebugGuard(); + eventHandler.latch(futureWait,std::move(functor)); + } + // TODO: improve signature of this function in the future + template + inline void multi_deallocate(uint32_t binding, uint32_t count, const value_type* addr, const ISemaphore::SWaitInfo& futureWait) noexcept + { + if (futureWait.semaphore) + multi_deallocate(futureWait, DeferredFreeFunctor(&m_composed, binding, count, addr)); + else + multi_deallocate(binding, count, addr); + } + //! Returns free events still outstanding + inline uint32_t cull_frees() noexcept + { + auto debugGuard = stAccessVerifyDebugGuard(); + return eventHandler.poll().eventsLeft; + } }; } From 68582ea79c4e538e01b2e16e9ff58eef56f4ac2f Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Mon, 26 Feb 2024 00:54:37 -0300 Subject: [PATCH 10/29] Work on having descriptor set match with its sub allocator --- include/nbl/video/alloc/SubAllocatedDescriptorSet.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index 0a503c79c8..a616b92f48 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -67,6 +67,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted }; MultiTimelineEventHandlerST eventHandler; std::map m_allocatableRanges = {}; + core::smart_refctd_ptr m_descriptorSet; #ifdef _NBL_DEBUG std::recursive_mutex stAccessVerfier; @@ -79,8 +80,9 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted // constructors template - inline SubAllocatedDescriptorSet(video::IGPUDescriptorSetLayout* layout) + inline SubAllocatedDescriptorSet(video::IGPUDescriptorSet* descriptorSet) { + auto layout = descriptorSet->getLayout(); for (uint32_t descriptorType = 0; descriptorType < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); descriptorType++) { auto descType = static_cast(descriptorType); @@ -111,6 +113,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted } } } + m_descriptorSet = core::smart_refctd_ptr(descriptorSet); } ~SubAllocatedDescriptorSet() @@ -161,6 +164,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted continue; outAddresses[i] = allocator->alloc_addr(1,1); + // TODO: should also write something to the descriptor set (or probably leave that to the caller?) } } inline void multi_deallocate(uint32_t binding, size_type count, const size_type* addr) @@ -174,6 +178,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted continue; allocator->free_addr(addr[i],1); + // TODO: should also write something to the descriptor sets } } //! @@ -187,7 +192,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted inline void multi_deallocate(uint32_t binding, uint32_t count, const value_type* addr, const ISemaphore::SWaitInfo& futureWait) noexcept { if (futureWait.semaphore) - multi_deallocate(futureWait, DeferredFreeFunctor(&m_composed, binding, count, addr)); + multi_deallocate(futureWait, DeferredFreeFunctor(&this, binding, count, addr)); else multi_deallocate(binding, count, addr); } From d71684818f4a1dd29edba0b8066dc32f2034dcb6 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Mon, 26 Feb 2024 23:15:24 -0300 Subject: [PATCH 11/29] PR reviews --- .../video/alloc/SubAllocatedDescriptorSet.h | 67 +++++++++++++------ 1 file changed, 45 insertions(+), 22 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index a616b92f48..c7ad2229c8 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -26,7 +26,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted class DeferredFreeFunctor { public: - inline DeferredFreeFunctor(SubAllocatedDescriptorSet* composed, uint32_t binding, size_type count, value_type* addresses) + inline DeferredFreeFunctor(SubAllocatedDescriptorSet* composed, uint32_t binding, size_type count, const value_type* addresses) : m_addresses(addresses, addresses + count), m_binding(binding), m_composed(composed) { } @@ -38,13 +38,13 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted #ifdef _NBL_DEBUG assert(m_composed); #endif // _NBL_DEBUG - m_composed->multi_deallocate(m_binding, m_addresses.size(), &m_addresses[0]); + m_composed->multi_deallocate(m_binding, m_addresses.size(), m_addresses.data()); } // Takes count of allocations we want to free up as reference, true is returned if // the amount of allocations freed was >= allocationsToFreeUp // False is returned if there are more allocations to free up - inline bool operator()(size_type allocationsToFreeUp) + inline bool operator()(size_type& allocationsToFreeUp) { auto prevCount = m_addresses.size(); operator()(); @@ -52,7 +52,11 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted // This does the same logic as bool operator()(size_type&) on // CAsyncSingleBufferSubAllocator - return totalFreed >= allocationsToFreeUp; + bool freedEverything = totalFreed >= allocationsToFreeUp; + + if (freedEverything) allocationsToFreeUp = 0u; + else allocationsToFreeUp -= totalFreed; + return freedEverything; } protected: SubAllocatedDescriptorSet* m_composed; @@ -61,11 +65,19 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted }; protected: struct SubAllocDescriptorSetRange { - std::shared_ptr addressAllocator; - std::shared_ptr reservedAllocator; + MultiTimelineEventHandlerST eventHandler; + std::unique_ptr addressAllocator; + std::unique_ptr reservedAllocator; size_t reservedSize; + + SubAllocDescriptorSetRange( + std::unique_ptr&& inAddressAllocator, + std::unique_ptr&& inReservedAllocator, + size_t inReservedSize) : + eventHandler({}), addressAllocator(std::move(inAddressAllocator)), + reservedAllocator(std::move(inReservedAllocator)), reservedSize(inReservedSize) {} + }; - MultiTimelineEventHandlerST eventHandler; std::map m_allocatableRanges = {}; core::smart_refctd_ptr m_descriptorSet; @@ -79,8 +91,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted public: // constructors - template - inline SubAllocatedDescriptorSet(video::IGPUDescriptorSet* descriptorSet) + inline SubAllocatedDescriptorSet(core::smart_refctd_ptr&& descriptorSet) { auto layout = descriptorSet->getLayout(); for (uint32_t descriptorType = 0; descriptorType < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); descriptorType++) @@ -101,19 +112,18 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT)) { - SubAllocDescriptorSetRange range; - range.reservedSize = AddressAllocator::reserved_size(MaxDescriptorSetAllocationAlignment, static_cast(count), MinDescriptorSetAllocationSize); - range.reservedAllocator = std::shared_ptr(new ReservedAllocator()); - range.addressAllocator = std::shared_ptr(new AddressAllocator( - range.reservedAllocator->allocate(range.reservedSize, _NBL_SIMD_ALIGNMENT), + auto reservedSize = AddressAllocator::reserved_size(MaxDescriptorSetAllocationAlignment, static_cast(count), MinDescriptorSetAllocationSize); + auto reservedAllocator = std::unique_ptr(new ReservedAllocator()); + auto addressAllocator = std::unique_ptr(new AddressAllocator( + reservedAllocator->allocate(reservedSize, _NBL_SIMD_ALIGNMENT), static_cast(0), 0u, MaxDescriptorSetAllocationAlignment, static_cast(count), MinDescriptorSetAllocationSize )); - m_allocatableRanges.emplace(binding.data, range); + m_allocatableRanges.emplace(binding.data, SubAllocDescriptorSetRange(std::move(addressAllocator), std::move(reservedAllocator), reservedSize)); } } } - m_descriptorSet = core::smart_refctd_ptr(descriptorSet); + m_descriptorSet = std::move(descriptorSet); } ~SubAllocatedDescriptorSet() @@ -135,7 +145,9 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted AddressAllocator* getBindingAllocator(uint32_t binding) { auto range = m_allocatableRanges.find(binding); - assert(range != m_allocatableRanges.end());// Check if this binding has an allocator + // Check if this binding has an allocator + if (range == m_allocatableRanges.end()) + return nullptr; return range->second.addressAllocator.get(); } @@ -182,17 +194,22 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted } } //! - inline void multi_deallocate(const ISemaphore::SWaitInfo& futureWait, DeferredFreeFunctor&& functor) noexcept + inline void multi_deallocate(uint32_t binding, const ISemaphore::SWaitInfo& futureWait, DeferredFreeFunctor&& functor) noexcept { + auto range = m_allocatableRanges.find(binding); + // Check if this binding has an allocator + if (range == m_allocatableRanges.end()) + return; + + auto& eventHandler = range->second.eventHandler; auto debugGuard = stAccessVerifyDebugGuard(); eventHandler.latch(futureWait,std::move(functor)); } // TODO: improve signature of this function in the future - template - inline void multi_deallocate(uint32_t binding, uint32_t count, const value_type* addr, const ISemaphore::SWaitInfo& futureWait) noexcept + inline void multi_deallocate(uint32_t binding, size_type count, const value_type* addr, const ISemaphore::SWaitInfo& futureWait) noexcept { if (futureWait.semaphore) - multi_deallocate(futureWait, DeferredFreeFunctor(&this, binding, count, addr)); + multi_deallocate(binding, futureWait, DeferredFreeFunctor(this, binding, count, addr)); else multi_deallocate(binding, count, addr); } @@ -200,7 +217,13 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted inline uint32_t cull_frees() noexcept { auto debugGuard = stAccessVerifyDebugGuard(); - return eventHandler.poll().eventsLeft; + uint32_t frees = 0; + for (uint32_t i = 0; i < m_allocatableRanges.size(); i++) + { + auto& it = m_allocatableRanges[i]; + frees += it.eventHandler.poll().eventsLeft; + } + return frees; } }; From 4fd4b8fe558066960fa10cf51ca9c791a05433b6 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Tue, 27 Feb 2024 17:40:03 -0300 Subject: [PATCH 12/29] Fix example --- .../video/alloc/SubAllocatedDescriptorSet.h | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index c7ad2229c8..d8a08c7e56 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -8,6 +8,7 @@ #include "nbl/video/alloc/IBufferAllocator.h" #include +#include namespace nbl::video { @@ -65,10 +66,10 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted }; protected: struct SubAllocDescriptorSetRange { - MultiTimelineEventHandlerST eventHandler; - std::unique_ptr addressAllocator; - std::unique_ptr reservedAllocator; - size_t reservedSize; + MultiTimelineEventHandlerST eventHandler = MultiTimelineEventHandlerST({}); + std::unique_ptr addressAllocator = nullptr; + std::unique_ptr reservedAllocator = nullptr; + size_t reservedSize = 0; SubAllocDescriptorSetRange( std::unique_ptr&& inAddressAllocator, @@ -76,7 +77,15 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted size_t inReservedSize) : eventHandler({}), addressAllocator(std::move(inAddressAllocator)), reservedAllocator(std::move(inReservedAllocator)), reservedSize(inReservedSize) {} + SubAllocDescriptorSetRange() {} + SubAllocDescriptorSetRange& operator=(SubAllocDescriptorSetRange&& other) + { + addressAllocator = std::move(other.addressAllocator); + reservedAllocator = std::move(other.reservedAllocator); + reservedSize = other.reservedSize; + return *this; + } }; std::map m_allocatableRanges = {}; core::smart_refctd_ptr m_descriptorSet; @@ -119,7 +128,8 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted static_cast(0), 0u, MaxDescriptorSetAllocationAlignment, static_cast(count), MinDescriptorSetAllocationSize )); - m_allocatableRanges.emplace(binding.data, SubAllocDescriptorSetRange(std::move(addressAllocator), std::move(reservedAllocator), reservedSize)); + + m_allocatableRanges[binding.data] = SubAllocDescriptorSetRange(std::move(addressAllocator), std::move(reservedAllocator), reservedSize); } } } From 58c4e901d1f1cadf10f15d28777a9f5b2fafda95 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Tue, 27 Feb 2024 18:31:11 -0300 Subject: [PATCH 13/29] Add writing of descriptors on the allocate method --- .../video/alloc/SubAllocatedDescriptorSet.h | 32 +++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index d8a08c7e56..263b8107c9 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -89,6 +89,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted }; std::map m_allocatableRanges = {}; core::smart_refctd_ptr m_descriptorSet; + core::smart_refctd_ptr m_logicalDevice; #ifdef _NBL_DEBUG std::recursive_mutex stAccessVerfier; @@ -100,7 +101,8 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted public: // constructors - inline SubAllocatedDescriptorSet(core::smart_refctd_ptr&& descriptorSet) + inline SubAllocatedDescriptorSet(core::smart_refctd_ptr&& descriptorSet, + core::smart_refctd_ptr&& logicalDevice) : m_logicalDevice(std::move(logicalDevice)) { auto layout = descriptorSet->getLayout(); for (uint32_t descriptorType = 0; descriptorType < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); descriptorType++) @@ -174,12 +176,20 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted bool stAccessVerifyDebugGuard() { return false; } #endif + video::IGPUDescriptorSet* getDescriptorSet() { return m_descriptorSet.get(); } + //! Warning `outAddresses` needs to be primed with `invalid_value` values, otherwise no allocation happens for elements not equal to `invalid_value` - inline void multi_allocate(uint32_t binding, size_type count, value_type* outAddresses) + inline void multi_allocate(uint32_t binding, size_type count, video::IGPUDescriptorSet::SDescriptorInfo* descriptors, value_type* outAddresses) { auto debugGuard = stAccessVerifyDebugGuard(); auto allocator = getBindingAllocator(binding); + + std::vector writes; + std::vector infos; + writes.reserve(count); + infos.reserve(count); + for (size_type i=0; ialloc_addr(1,1); // TODO: should also write something to the descriptor set (or probably leave that to the caller?) + + auto& descriptor = descriptors[i]; + + video::IGPUDescriptorSet::SWriteDescriptorSet write; + { + write.dstSet = m_descriptorSet.get(); + write.binding = binding; + write.arrayElement = outAddresses[i]; + write.count = 1u; + // descriptors could be a const pointer, but the problem is that this pointer in + // SWriteDescriptorSet.info isn't const + // can we change it? + write.info = &descriptor; + } + infos.push_back(descriptor); + writes.push_back(write); } + + m_logicalDevice->updateDescriptorSets(writes, {}); } inline void multi_deallocate(uint32_t binding, size_type count, const size_type* addr) { From 41b9a5b8af45eddc9e79c2fa156e2f5c9c817fa7 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Tue, 27 Feb 2024 18:49:43 -0300 Subject: [PATCH 14/29] Work on try allocate and timings --- .../video/alloc/SubAllocatedDescriptorSet.h | 60 +++++++++++++++++-- 1 file changed, 55 insertions(+), 5 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index 263b8107c9..c15a1a3f05 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -64,9 +64,10 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted uint32_t m_binding; std::vector m_addresses; }; + using EventHandler = MultiTimelineEventHandlerST; protected: struct SubAllocDescriptorSetRange { - MultiTimelineEventHandlerST eventHandler = MultiTimelineEventHandlerST({}); + EventHandler eventHandler = EventHandler({}); std::unique_ptr addressAllocator = nullptr; std::unique_ptr reservedAllocator = nullptr; size_t reservedSize = 0; @@ -179,7 +180,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted video::IGPUDescriptorSet* getDescriptorSet() { return m_descriptorSet.get(); } //! Warning `outAddresses` needs to be primed with `invalid_value` values, otherwise no allocation happens for elements not equal to `invalid_value` - inline void multi_allocate(uint32_t binding, size_type count, video::IGPUDescriptorSet::SDescriptorInfo* descriptors, value_type* outAddresses) + inline size_type try_multi_allocate(uint32_t binding, size_type count, video::IGPUDescriptorSet::SDescriptorInfo* descriptors, value_type* outAddresses) { auto debugGuard = stAccessVerifyDebugGuard(); @@ -190,13 +191,18 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted writes.reserve(count); infos.reserve(count); + size_type unallocatedSize = 0u; for (size_type i=0; ialloc_addr(1,1); - // TODO: should also write something to the descriptor set (or probably leave that to the caller?) + if (outAddresses[i] == AddressAllocator::invalid_address) + { + unallocatedSize = count - i; + break; + } auto& descriptor = descriptors[i]; @@ -216,12 +222,56 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted } m_logicalDevice->updateDescriptorSets(writes, {}); + return unallocatedSize; + } + + template + inline size_type multi_allocate(const std::chrono::time_point& maxWaitPoint, uint32_t binding, size_type count, video::IGPUDescriptorSet::SDescriptorInfo* descriptors, value_type* outAddresses) noexcept + { + auto debugGuard = stAccessVerifyDebugGuard(); + + auto range = m_allocatableRanges.find(binding); + // Check if this binding has an allocator + if (range == m_allocatableRanges.end()) + return count; + + auto& eventHandler = range->second.eventHandler; + + // try allocate once + size_type unallocatedSize = try_multi_allocate(binding, count, descriptors, outAddresses); + if (!unallocatedSize) + return 0u; + + // then try to wait at least once and allocate + do + { + eventHandler.wait(maxWaitPoint, unallocatedSize); + + unallocatedSize = try_multi_allocate(binding, unallocatedSize, &descriptors[count - unallocatedSize], &outAddresses[count - unallocatedSize]); + if (!unallocatedSize) + return 0u; + } while(Clock::now() Date: Wed, 28 Feb 2024 14:40:30 -0300 Subject: [PATCH 15/29] Add PR comments --- include/nbl/video/alloc/SubAllocatedDescriptorSet.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index c15a1a3f05..041fac611b 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -221,6 +221,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted writes.push_back(write); } + // TODO: this goes outside m_logicalDevice->updateDescriptorSets(writes, {}); return unallocatedSize; } @@ -279,6 +280,11 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted allocator->free_addr(addr[i],1); // TODO: should also write something to the descriptor sets + // basically if nullDescriptor device feature is enabled, you would + // indeed write to the DS, else you'd just drop the refcounted references + // + // this needs to be done as a IGPUDescriptorSet::nullify(const uint32_t binding, + // std::span indices) function + a virtual nullify_impl } } From b326ca738654aa242518b90a0cbc7778046813b8 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Thu, 29 Feb 2024 19:22:08 -0300 Subject: [PATCH 16/29] Work on nullifying descriptors --- include/nbl/video/IGPUDescriptorSet.h | 9 ++++++++ include/nbl/video/ILogicalDevice.h | 7 ++++++ .../video/alloc/SubAllocatedDescriptorSet.h | 6 +++++ src/nbl/video/CVulkanLogicalDevice.cpp | 8 +++++++ src/nbl/video/CVulkanLogicalDevice.h | 1 + src/nbl/video/IGPUDescriptorSet.cpp | 23 +++++++++++++++++++ src/nbl/video/ILogicalDevice.cpp | 8 +++++++ 7 files changed, 62 insertions(+) diff --git a/include/nbl/video/IGPUDescriptorSet.h b/include/nbl/video/IGPUDescriptorSet.h index 87b1190d40..6f141b6c0e 100644 --- a/include/nbl/video/IGPUDescriptorSet.h +++ b/include/nbl/video/IGPUDescriptorSet.h @@ -45,6 +45,14 @@ class IGPUDescriptorSet : public asset::IDescriptorSet dropDescriptors); + //! Renderpasses and Framebuffers core::smart_refctd_ptr createRenderpass(const IGPURenderpass::SCreationParams& params); inline core::smart_refctd_ptr createFramebuffer(IGPUFramebuffer::SCreationParams&& params) @@ -836,6 +839,10 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe }; virtual void updateDescriptorSets_impl(const SUpdateDescriptorSetsParams& params) = 0; + // Drops refcounted references of the descriptors in these indices for the descriptor lifetime tracking + // If the nullDescriptor device feature is enabled, this would also write a null descriptor to the descriptor set + virtual void nullifyDescriptors_impl(const std::span dropDescriptors) = 0; + virtual core::smart_refctd_ptr createRenderpass_impl(const IGPURenderpass::SCreationParams& params, IGPURenderpass::SCreationParamValidationResult&& validation) = 0; virtual core::smart_refctd_ptr createFramebuffer_impl(IGPUFramebuffer::SCreationParams&& params) = 0; diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index 041fac611b..acfb0a7ed8 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -285,6 +285,12 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted // // this needs to be done as a IGPUDescriptorSet::nullify(const uint32_t binding, // std::span indices) function + a virtual nullify_impl + video::IGPUDescriptorSet::SDropDescriptorSet dropDescriptorSet; + dropDescriptorSet.dstSet = m_descriptorSet.get(); + dropDescriptorSet.binding = binding; + dropDescriptorSet.arrayElement = i; + dropDescriptorSet.count = 1; + m_logicalDevice->nullifyDescriptors({ &dropDescriptorSet, 1 }); } } diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 91d158b0ea..e124bb669b 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -717,6 +717,14 @@ void CVulkanLogicalDevice::updateDescriptorSets_impl(const SUpdateDescriptorSets m_devf.vk.vkUpdateDescriptorSets(m_vkdev,vk_writeDescriptorSets.size(),vk_writeDescriptorSets.data(),vk_copyDescriptorSets.size(),vk_copyDescriptorSets.data()); } +void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::span dropDescriptors) +{ + if (getEnabledFeatures().nullDescriptor) + { + // TODO: Write null descriptors here + } +} + core::smart_refctd_ptr CVulkanLogicalDevice::createRenderpass_impl(const IGPURenderpass::SCreationParams& params, IGPURenderpass::SCreationParamValidationResult&& validation) { diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index 65489d9c53..dc9efc1611 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -281,6 +281,7 @@ class CVulkanLogicalDevice final : public ILogicalDevice // descriptor sets core::smart_refctd_ptr createDescriptorPool_impl(const IDescriptorPool::SCreateInfo& createInfo) override; void updateDescriptorSets_impl(const SUpdateDescriptorSetsParams& params) override; + void nullifyDescriptors_impl(const std::span dropDescriptors) override; // renderpasses and framebuffers core::smart_refctd_ptr createRenderpass_impl(const IGPURenderpass::SCreationParams& params, IGPURenderpass::SCreationParamValidationResult&& validation) override; diff --git a/src/nbl/video/IGPUDescriptorSet.cpp b/src/nbl/video/IGPUDescriptorSet.cpp index 29b4eaab08..6ef0fa3578 100644 --- a/src/nbl/video/IGPUDescriptorSet.cpp +++ b/src/nbl/video/IGPUDescriptorSet.cpp @@ -124,6 +124,29 @@ void IGPUDescriptorSet::processWrite(const IGPUDescriptorSet::SWriteDescriptorSe incrementVersion(); } +void IGPUDescriptorSet::dropDescriptors(const IGPUDescriptorSet::SDropDescriptorSet& drop) +{ + assert(drop.dstSet == this); + + for (uint32_t t = 0; t < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++t) + { + const auto type = static_cast(t); + + auto* dstDescriptors = drop.dstSet->getDescriptors(type, drop.binding); + auto* dstSamplers = drop.dstSet->getMutableSamplers(drop.binding); + + if (dstDescriptors) + for (uint32_t i = 0; i < drop.count; i++) + dstDescriptors[drop.arrayElement + i] = nullptr; + + if (dstSamplers) + for (uint32_t i = 0; i < drop.count; i++) + dstSamplers[drop.arrayElement + i] = nullptr; + } + + incrementVersion(); +} + bool IGPUDescriptorSet::validateCopy(const IGPUDescriptorSet::SCopyDescriptorSet& copy) const { assert(copy.dstSet == this); diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 97030ccbba..649154403b 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -420,6 +420,14 @@ bool ILogicalDevice::updateDescriptorSets(const std::span dropDescriptors) +{ + for (const auto& drop : dropDescriptors) + drop.dstSet->dropDescriptors(drop); + + nullifyDescriptors_impl(dropDescriptors); +} + core::smart_refctd_ptr ILogicalDevice::createRenderpass(const IGPURenderpass::SCreationParams& params) { IGPURenderpass::SCreationParamValidationResult validation = IGPURenderpass::validateCreationParams(params); From 894a47cd862f678c92991a1ef1861920b4d91f6d Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Mon, 4 Mar 2024 18:57:24 -0300 Subject: [PATCH 17/29] Keep descriptor writes outside the allocate function --- .../video/alloc/SubAllocatedDescriptorSet.h | 42 ++++++++++++++----- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index acfb0a7ed8..4c3c2f2497 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -180,7 +180,13 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted video::IGPUDescriptorSet* getDescriptorSet() { return m_descriptorSet.get(); } //! Warning `outAddresses` needs to be primed with `invalid_value` values, otherwise no allocation happens for elements not equal to `invalid_value` - inline size_type try_multi_allocate(uint32_t binding, size_type count, video::IGPUDescriptorSet::SDescriptorInfo* descriptors, value_type* outAddresses) + inline size_type try_multi_allocate( + uint32_t binding, + size_type count, + video::IGPUDescriptorSet::SDescriptorInfo* descriptors, + video::IGPUDescriptorSet::SWriteDescriptorSet* outDescriptorWrites, + value_type* outAddresses + ) { auto debugGuard = stAccessVerifyDebugGuard(); @@ -217,17 +223,21 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted // can we change it? write.info = &descriptor; } - infos.push_back(descriptor); - writes.push_back(write); + outDescriptorWrites[i] = write; } - // TODO: this goes outside - m_logicalDevice->updateDescriptorSets(writes, {}); return unallocatedSize; } template - inline size_type multi_allocate(const std::chrono::time_point& maxWaitPoint, uint32_t binding, size_type count, video::IGPUDescriptorSet::SDescriptorInfo* descriptors, value_type* outAddresses) noexcept + inline size_type multi_allocate( + const std::chrono::time_point& maxWaitPoint, + uint32_t binding, + size_type count, + video::IGPUDescriptorSet::SDescriptorInfo* descriptors, + video::IGPUDescriptorSet::SWriteDescriptorSet* outDescriptorWrites, + value_type* outAddresses + ) noexcept { auto debugGuard = stAccessVerifyDebugGuard(); @@ -239,7 +249,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted auto& eventHandler = range->second.eventHandler; // try allocate once - size_type unallocatedSize = try_multi_allocate(binding, count, descriptors, outAddresses); + size_type unallocatedSize = try_multi_allocate(binding, count, descriptors, outDescriptorWrites, outAddresses); if (!unallocatedSize) return 0u; @@ -248,7 +258,13 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted { eventHandler.wait(maxWaitPoint, unallocatedSize); - unallocatedSize = try_multi_allocate(binding, unallocatedSize, &descriptors[count - unallocatedSize], &outAddresses[count - unallocatedSize]); + unallocatedSize = try_multi_allocate( + binding, + unallocatedSize, + &descriptors[count - unallocatedSize], + &outDescriptorWrites[count - unallocatedSize], + &outAddresses[count - unallocatedSize] + ); if (!unallocatedSize) return 0u; } while(Clock::now() Date: Mon, 4 Mar 2024 19:14:32 -0300 Subject: [PATCH 18/29] Include exporting of allocate descriptor writes instead of using them --- include/nbl/video/alloc/SubAllocatedDescriptorSet.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index 4c3c2f2497..87ac6d1e05 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -28,8 +28,10 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted { public: inline DeferredFreeFunctor(SubAllocatedDescriptorSet* composed, uint32_t binding, size_type count, const value_type* addresses) - : m_addresses(addresses, addresses + count), m_binding(binding), m_composed(composed) + : m_addresses(std::move(core::make_refctd_dynamic_array>(count))), + m_binding(binding), m_composed(composed) { + memcpy(m_addresses->data(), addresses, count * sizeof(value_type)); } // Just does the de-allocation @@ -39,7 +41,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted #ifdef _NBL_DEBUG assert(m_composed); #endif // _NBL_DEBUG - m_composed->multi_deallocate(m_binding, m_addresses.size(), m_addresses.data()); + m_composed->multi_deallocate(m_binding, m_addresses->size(), m_addresses->data()); } // Takes count of allocations we want to free up as reference, true is returned if @@ -47,9 +49,9 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted // False is returned if there are more allocations to free up inline bool operator()(size_type& allocationsToFreeUp) { - auto prevCount = m_addresses.size(); + auto prevCount = m_addresses->size(); operator()(); - auto totalFreed = m_addresses.size() - prevCount; + auto totalFreed = m_addresses->size() - prevCount; // This does the same logic as bool operator()(size_type&) on // CAsyncSingleBufferSubAllocator @@ -60,9 +62,9 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted return freedEverything; } protected: + core::smart_refctd_dynamic_array m_addresses; SubAllocatedDescriptorSet* m_composed; uint32_t m_binding; - std::vector m_addresses; }; using EventHandler = MultiTimelineEventHandlerST; protected: From 54250a6c83cf054127dcd0feabd72a7a90bde3c4 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Mon, 4 Mar 2024 19:38:05 -0300 Subject: [PATCH 19/29] Update examples submodule --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 5a94b7ef78..04ca9e27dd 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 5a94b7ef784f1aa81905cc4a05aab2adc64576ed +Subproject commit 04ca9e27dd7980d552939d74021f2790eff17622 From 2c352893a778f7bd1a5dae0b5756aa9f41580081 Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Tue, 5 Mar 2024 10:17:03 +0100 Subject: [PATCH 20/29] Update SubAllocatedDescriptorSet.h Some suggestions to optimize and make stuff correct --- .../video/alloc/SubAllocatedDescriptorSet.h | 152 +++++++----------- 1 file changed, 59 insertions(+), 93 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index 87ac6d1e05..03d22860e9 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -34,24 +34,27 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted memcpy(m_addresses->data(), addresses, count * sizeof(value_type)); } - // Just does the de-allocation - inline void operator()() + // + inline auto getWorstCaseCount() const {return m_addresses->size();} + + // Just does the de-allocation, note that the parameter is a reference + inline void operator()(IGPUDescriptorSet::SDropDescriptorSet* &outNullify) { - // isn't assert already debug-only? #ifdef _NBL_DEBUG assert(m_composed); #endif // _NBL_DEBUG - m_composed->multi_deallocate(m_binding, m_addresses->size(), m_addresses->data()); + outNullify = m_composed->multi_deallocate(outNullify, m_binding, m_addresses->size(), m_addresses->data()); + m_composed->m_totalDeferredFrees -= getWorstCaseCount(); } // Takes count of allocations we want to free up as reference, true is returned if // the amount of allocations freed was >= allocationsToFreeUp // False is returned if there are more allocations to free up - inline bool operator()(size_type& allocationsToFreeUp) + inline bool operator()(size_type& allocationsToFreeUp, IGPUDescriptorSet::SDropDescriptorSet* &outNullify) { - auto prevCount = m_addresses->size(); - operator()(); - auto totalFreed = m_addresses->size() - prevCount; + auto prevNullify = outNullify; + operator()(outNullify); + auto totalFreed = outNullify-prevNullify; // This does the same logic as bool operator()(size_type&) on // CAsyncSingleBufferSubAllocator @@ -63,7 +66,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted } protected: core::smart_refctd_dynamic_array m_addresses; - SubAllocatedDescriptorSet* m_composed; + SubAllocatedDescriptorSet* m_composed; // TODO: shouldn't be called `composed`, maybe `parent` or something uint32_t m_binding; }; using EventHandler = MultiTimelineEventHandlerST; @@ -93,6 +96,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted std::map m_allocatableRanges = {}; core::smart_refctd_ptr m_descriptorSet; core::smart_refctd_ptr m_logicalDevice; + value_type m_totalDeferredFrees = 0; #ifdef _NBL_DEBUG std::recursive_mutex stAccessVerfier; @@ -141,7 +145,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted m_descriptorSet = std::move(descriptorSet); } - ~SubAllocatedDescriptorSet() + inline ~SubAllocatedDescriptorSet() { for (uint32_t i = 0; i < m_allocatableRanges.size(); i++) { @@ -155,9 +159,9 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted } // whether that binding index can be sub-allocated - bool isBindingAllocatable(uint32_t binding) { return m_allocatableRanges.find(binding) != m_allocatableRanges.end(); } + inline bool isBindingAllocatable(uint32_t binding) { return m_allocatableRanges.find(binding) != m_allocatableRanges.end(); } - AddressAllocator* getBindingAllocator(uint32_t binding) + inline AddressAllocator* getBindingAllocator(uint32_t binding) { auto range = m_allocatableRanges.find(binding); // Check if this binding has an allocator @@ -169,36 +173,26 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted // main methods #ifdef _NBL_DEBUG - std::unique_lock stAccessVerifyDebugGuard() + inline std::unique_lock stAccessVerifyDebugGuard() { std::unique_lock tLock(stAccessVerfier,std::try_to_lock_t()); assert(tLock.owns_lock()); return tLock; } #else - bool stAccessVerifyDebugGuard() { return false; } + inline bool stAccessVerifyDebugGuard() { return false; } #endif - video::IGPUDescriptorSet* getDescriptorSet() { return m_descriptorSet.get(); } + inline video::IGPUDescriptorSet* getDescriptorSet() { return m_descriptorSet.get(); } //! Warning `outAddresses` needs to be primed with `invalid_value` values, otherwise no allocation happens for elements not equal to `invalid_value` - inline size_type try_multi_allocate( - uint32_t binding, - size_type count, - video::IGPUDescriptorSet::SDescriptorInfo* descriptors, - video::IGPUDescriptorSet::SWriteDescriptorSet* outDescriptorWrites, - value_type* outAddresses - ) + inline size_type try_multi_allocate(const uint32_t binding, const size_type count, value_type* outAddresses) noexcept { auto debugGuard = stAccessVerifyDebugGuard(); + // we assume you've validated that the binding is allocatable before trying this auto allocator = getBindingAllocator(binding); - std::vector writes; - std::vector infos; - writes.reserve(count); - infos.reserve(count); - size_type unallocatedSize = 0u; for (size_type i=0; i - inline size_type multi_allocate( - const std::chrono::time_point& maxWaitPoint, - uint32_t binding, - size_type count, - video::IGPUDescriptorSet::SDescriptorInfo* descriptors, - video::IGPUDescriptorSet::SWriteDescriptorSet* outDescriptorWrites, - value_type* outAddresses - ) noexcept + inline size_type multi_allocate(const std::chrono::time_point& maxWaitPoint, const uint32_t binding, const size_type count, value_type* outAddresses) noexcept { auto debugGuard = stAccessVerifyDebugGuard(); @@ -248,76 +220,60 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted if (range == m_allocatableRanges.end()) return count; - auto& eventHandler = range->second.eventHandler; - // try allocate once - size_type unallocatedSize = try_multi_allocate(binding, count, descriptors, outDescriptorWrites, outAddresses); + size_type unallocatedSize = try_multi_allocate(binding,count,outAddresses); if (!unallocatedSize) return 0u; // then try to wait at least once and allocate + auto& eventHandler = range->second.eventHandler; + core::vector nulls(m_totalDeferredFrees); + auto outNulls = nulls.data(); do { - eventHandler.wait(maxWaitPoint, unallocatedSize); - - unallocatedSize = try_multi_allocate( - binding, - unallocatedSize, - &descriptors[count - unallocatedSize], - &outDescriptorWrites[count - unallocatedSize], - &outAddresses[count - unallocatedSize] - ); + eventHandler.wait(maxWaitPoint, unallocatedSize, outNulls); + + // always call with the same parameters, otherwise this turns into a mess with the non invalid_address gaps + unallocatedSize = try_multi_allocate(binding,count,outAddresses); if (!unallocatedSize) - return 0u; + break; } while(Clock::now()nullifyDescriptors({nulls.data(),outNulls}); return unallocatedSize; } - inline size_type multi_allocate( - uint32_t binding, - size_type count, - video::IGPUDescriptorSet::SDescriptorInfo* descriptors, - video::IGPUDescriptorSet::SWriteDescriptorSet* outDescriptorWrites, - value_type* outAddresses - ) noexcept + // default timeout overload + inline size_type multi_allocate(const uint32_t binding, const size_type count, value_type* outAddresses) noexcept { - auto range = m_allocatableRanges.find(binding); - // Check if this binding has an allocator - if (range == m_allocatableRanges.end()) - return count; - - return multi_allocate(TimelineEventHandlerBase::default_wait(), binding, count, descriptors, outDescriptorWrites, outAddresses); + // check that the binding is allocatable is done inside anyway + return multi_allocate(TimelineEventHandlerBase::default_wait(), binding, count, outAddresses); } - inline void multi_deallocate(uint32_t binding, size_type count, const size_type* addr) + // Very explicit low level call you'd need to sync and drop descriptors by yourself + // Returns: the one-past the last `outNullify` write pointer, this allows you to work out how many descriptors were freed + inline void multi_deallocate(IGPUDescriptorSet::SDropDescriptorSet* outNullify, uint32_t binding, size_type count, const size_type* addr) { auto debugGuard = stAccessVerifyDebugGuard(); auto allocator = getBindingAllocator(binding); - if (!allocator) - return; + if (allocator) for (size_type i=0; ifree_addr(addr[i],1); - // TODO: should also write something to the descriptor sets - // basically if nullDescriptor device feature is enabled, you would - // indeed write to the DS, else you'd just drop the refcounted references - // - // this needs to be done as a IGPUDescriptorSet::nullify(const uint32_t binding, - // std::span indices) function + a virtual nullify_impl - video::IGPUDescriptorSet::SDropDescriptorSet dropDescriptorSet; - dropDescriptorSet.dstSet = m_descriptorSet.get(); - dropDescriptorSet.binding = binding; - dropDescriptorSet.arrayElement = i; - dropDescriptorSet.count = 1; - m_logicalDevice->nullifyDescriptors({ &dropDescriptorSet, 1 }); + outNullify->dstSet = m_descriptorSet.get(); + outNullify->binding = binding; + outNullify->arrayElement = i; + outNullify->count = 1; + outNullify++; } + return outNullify; } + // 100% will defer inline void multi_deallocate(uint32_t binding, const ISemaphore::SWaitInfo& futureWait, DeferredFreeFunctor&& functor) noexcept { auto range = m_allocatableRanges.find(binding); @@ -327,26 +283,36 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted auto& eventHandler = range->second.eventHandler; auto debugGuard = stAccessVerifyDebugGuard(); + m_totalDeferredFrees += functor.getWorstCaseCount(); eventHandler.latch(futureWait,std::move(functor)); } + // defers based on the conservative estimation if `futureWait` needs to be waited on, if doesn't will call nullify descriiptors internally immediately inline void multi_deallocate(uint32_t binding, size_type count, const value_type* addr, const ISemaphore::SWaitInfo& futureWait) noexcept { if (futureWait.semaphore) multi_deallocate(binding, futureWait, DeferredFreeFunctor(this, binding, count, addr)); else - multi_deallocate(binding, count, addr); + { + core::vector nulls(count); + auto actualEnd = multi_deallocate(nulls.data(), binding, count, addr); + m_logicalDevice->nullifyDescriptors({nulls.data(),actualEnd}); + } } + //! Returns free events still outstanding inline uint32_t cull_frees() noexcept { auto debugGuard = stAccessVerifyDebugGuard(); uint32_t frees = 0; + core::vector nulls(m_totalDeferredFrees); + auto outNulls = nulls.data(); for (uint32_t i = 0; i < m_allocatableRanges.size(); i++) { auto& it = m_allocatableRanges[i]; - frees += it.eventHandler.poll().eventsLeft; + frees += it.eventHandler.poll(outNulls).eventsLeft; } + m_logicalDevice->nullifyDescriptors({nulls.data(),outNulls}); return frees; } }; From a2e2be495730673f6d4d358b240fb2319f4c8b83 Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Tue, 5 Mar 2024 10:26:30 +0100 Subject: [PATCH 21/29] Forgot that the nullification needs to be done between event-wait and allocation --- include/nbl/video/alloc/SubAllocatedDescriptorSet.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index 03d22860e9..a9d1548e48 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -147,6 +147,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted inline ~SubAllocatedDescriptorSet() { +static_assert(false, "should nullify/destroy/poll the event deferred handler to completion, then as we iterate through `m_allocatableRanges` assert that the allocators have no allocations/everything is free"); for (uint32_t i = 0; i < m_allocatableRanges.size(); i++) { auto& range = m_allocatableRanges[i]; @@ -227,18 +228,20 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted // then try to wait at least once and allocate auto& eventHandler = range->second.eventHandler; - core::vector nulls(m_totalDeferredFrees); - auto outNulls = nulls.data(); + core::vector nulls; do { + // FUTURE TODO: later we could only nullify the descriptors we don't end up reallocating if without robustness features + nulls.resize(m_totalDeferredFrees); + auto outNulls = nulls.data(); eventHandler.wait(maxWaitPoint, unallocatedSize, outNulls); + m_logicalDevice->nullifyDescriptors({nulls.data(),outNulls}); // always call with the same parameters, otherwise this turns into a mess with the non invalid_address gaps unallocatedSize = try_multi_allocate(binding,count,outAddresses); if (!unallocatedSize) break; } while(Clock::now()nullifyDescriptors({nulls.data(),outNulls}); return unallocatedSize; } From bc5b22d277d1934f2c763847b54177974298cb89 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Thu, 7 Mar 2024 23:55:28 -0300 Subject: [PATCH 22/29] PR review and fix compilation errors --- examples_tests | 2 +- .../video/alloc/SubAllocatedDescriptorSet.h | 30 +++++++++++++++++-- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/examples_tests b/examples_tests index 04ca9e27dd..9b6764f885 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 04ca9e27dd7980d552939d74021f2790eff17622 +Subproject commit 9b6764f885374b97c841f6f9e188505ca9d24601 diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index a9d1548e48..48bc01047d 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -33,6 +33,10 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted { memcpy(m_addresses->data(), addresses, count * sizeof(value_type)); } + inline DeferredFreeFunctor(DeferredFreeFunctor&& other) + { + operator=(std::move(other)); + } // inline auto getWorstCaseCount() const {return m_addresses->size();} @@ -47,6 +51,28 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted m_composed->m_totalDeferredFrees -= getWorstCaseCount(); } + DeferredFreeFunctor(const DeferredFreeFunctor& other) = delete; + DeferredFreeFunctor& operator=(const DeferredFreeFunctor& other) = delete; + inline DeferredFreeFunctor& operator=(DeferredFreeFunctor&& other) + { + m_composed = other.m_composed; + m_addresses = other.m_addresses; + m_binding = other.m_binding; + return *this; + } + + // This is needed for the destructor of TimelineEventHandlerST + // Don't call this directly + // TODO: Find a workaround for this + inline void operator()() + { + core::vector nulls(m_addresses->size()); + auto ptr = nulls.data(); + operator()(ptr); + auto size = ptr - nulls.data(); + m_composed->m_logicalDevice->nullifyDescriptors({nulls.data(),size_type(size)}); + } + // Takes count of allocations we want to free up as reference, true is returned if // the amount of allocations freed was >= allocationsToFreeUp // False is returned if there are more allocations to free up @@ -147,7 +173,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted inline ~SubAllocatedDescriptorSet() { -static_assert(false, "should nullify/destroy/poll the event deferred handler to completion, then as we iterate through `m_allocatableRanges` assert that the allocators have no allocations/everything is free"); + // TODO: should nullify/destroy/poll the event deferred handler to completion, then as we iterate through `m_allocatableRanges` assert that the allocators have no allocations/everything is free for (uint32_t i = 0; i < m_allocatableRanges.size(); i++) { auto& range = m_allocatableRanges[i]; @@ -255,7 +281,7 @@ static_assert(false, "should nullify/destroy/poll the event deferred handler to // Very explicit low level call you'd need to sync and drop descriptors by yourself // Returns: the one-past the last `outNullify` write pointer, this allows you to work out how many descriptors were freed - inline void multi_deallocate(IGPUDescriptorSet::SDropDescriptorSet* outNullify, uint32_t binding, size_type count, const size_type* addr) + inline IGPUDescriptorSet::SDropDescriptorSet* multi_deallocate(IGPUDescriptorSet::SDropDescriptorSet* outNullify, uint32_t binding, size_type count, const size_type* addr) { auto debugGuard = stAccessVerifyDebugGuard(); From 912ed7abe307dc7cebb6ff1c47fd89a02299f488 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Sun, 10 Mar 2024 21:27:41 -0300 Subject: [PATCH 23/29] PR reviews & nullifying descriptors --- include/nbl/video/ILogicalDevice.h | 4 +- .../video/alloc/SubAllocatedDescriptorSet.h | 37 +++++++---- src/nbl/video/CVulkanLogicalDevice.cpp | 66 ++++++++++++++++++- src/nbl/video/CVulkanLogicalDevice.h | 2 +- src/nbl/video/IGPUDescriptorSet.cpp | 10 ++- src/nbl/video/ILogicalDevice.cpp | 13 +++- 6 files changed, 111 insertions(+), 21 deletions(-) diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 9537ad29da..16176ef880 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -633,7 +633,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe } // should this be joined together with the existing updateDescriptorSets? - void nullifyDescriptors(const std::span dropDescriptors); + bool nullifyDescriptors(const std::span dropDescriptors, asset::IDescriptor::E_TYPE descriptorType); //! Renderpasses and Framebuffers core::smart_refctd_ptr createRenderpass(const IGPURenderpass::SCreationParams& params); @@ -853,7 +853,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe // Drops refcounted references of the descriptors in these indices for the descriptor lifetime tracking // If the nullDescriptor device feature is enabled, this would also write a null descriptor to the descriptor set - virtual void nullifyDescriptors_impl(const std::span dropDescriptors) = 0; + virtual void nullifyDescriptors_impl(const std::span dropDescriptors, asset::IDescriptor::E_TYPE descriptorType) = 0; virtual core::smart_refctd_ptr createRenderpass_impl(const IGPURenderpass::SCreationParams& params, IGPURenderpass::SCreationParamValidationResult&& validation) = 0; virtual core::smart_refctd_ptr createFramebuffer_impl(IGPUFramebuffer::SCreationParams&& params) = 0; diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index 48bc01047d..bd6a1dc454 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -66,11 +66,12 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted // TODO: Find a workaround for this inline void operator()() { - core::vector nulls(m_addresses->size()); - auto ptr = nulls.data(); - operator()(ptr); - auto size = ptr - nulls.data(); - m_composed->m_logicalDevice->nullifyDescriptors({nulls.data(),size_type(size)}); + assert(false); // This should not be called, timeline needs to be drained before destructor + // core::vector nulls(m_addresses->size()); + // auto ptr = nulls.data(); + // operator()(ptr); + // auto size = ptr - nulls.data(); + // m_composed->m_logicalDevice->nullifyDescriptors({nulls.data(),size_type(size)}); } // Takes count of allocations we want to free up as reference, true is returned if @@ -102,13 +103,17 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted std::unique_ptr addressAllocator = nullptr; std::unique_ptr reservedAllocator = nullptr; size_t reservedSize = 0; + asset::IDescriptor::E_TYPE descriptorType = asset::IDescriptor::E_TYPE::ET_COUNT; SubAllocDescriptorSetRange( std::unique_ptr&& inAddressAllocator, std::unique_ptr&& inReservedAllocator, - size_t inReservedSize) : + size_t inReservedSize, + asset::IDescriptor::E_TYPE inDescriptorType) : eventHandler({}), addressAllocator(std::move(inAddressAllocator)), - reservedAllocator(std::move(inReservedAllocator)), reservedSize(inReservedSize) {} + reservedAllocator(std::move(inReservedAllocator)), + reservedSize(inReservedSize), + descriptorType(inDescriptorType) {} SubAllocDescriptorSetRange() {} SubAllocDescriptorSetRange& operator=(SubAllocDescriptorSetRange&& other) @@ -116,6 +121,13 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted addressAllocator = std::move(other.addressAllocator); reservedAllocator = std::move(other.reservedAllocator); reservedSize = other.reservedSize; + descriptorType = other.descriptorType; + + // Nullify other + other.addressAllocator = nullptr; + other.reservedAllocator = nullptr; + other.reservedSize = 0u; + other.descriptorType = asset::IDescriptor::E_TYPE::ET_COUNT; return *this; } }; @@ -164,7 +176,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted MinDescriptorSetAllocationSize )); - m_allocatableRanges[binding.data] = SubAllocDescriptorSetRange(std::move(addressAllocator), std::move(reservedAllocator), reservedSize); + m_allocatableRanges[binding.data] = SubAllocDescriptorSetRange(std::move(addressAllocator), std::move(reservedAllocator), reservedSize, descType); } } } @@ -261,7 +273,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted nulls.resize(m_totalDeferredFrees); auto outNulls = nulls.data(); eventHandler.wait(maxWaitPoint, unallocatedSize, outNulls); - m_logicalDevice->nullifyDescriptors({nulls.data(),outNulls}); + m_logicalDevice->nullifyDescriptors({ nulls.data(),outNulls }, range->second.descriptorType); // always call with the same parameters, otherwise this turns into a mess with the non invalid_address gaps unallocatedSize = try_multi_allocate(binding,count,outAddresses); @@ -325,7 +337,9 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted { core::vector nulls(count); auto actualEnd = multi_deallocate(nulls.data(), binding, count, addr); - m_logicalDevice->nullifyDescriptors({nulls.data(),actualEnd}); + // This is checked to be valid above + auto range = m_allocatableRanges.find(binding); + m_logicalDevice->nullifyDescriptors({nulls.data(),actualEnd}, range->second.descriptorType); } } @@ -340,8 +354,9 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted { auto& it = m_allocatableRanges[i]; frees += it.eventHandler.poll(outNulls).eventsLeft; + // TODO: this could be optimized to be put outside the loop + m_logicalDevice->nullifyDescriptors({nulls.data(),outNulls}, it.descriptorType); } - m_logicalDevice->nullifyDescriptors({nulls.data(),outNulls}); return frees; } }; diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 141d3a5ee1..7340893d4d 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -727,11 +727,73 @@ void CVulkanLogicalDevice::updateDescriptorSets_impl(const SUpdateDescriptorSets m_devf.vk.vkUpdateDescriptorSets(m_vkdev,vk_writeDescriptorSets.size(),vk_writeDescriptorSets.data(),vk_copyDescriptorSets.size(),vk_copyDescriptorSets.data()); } -void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::span dropDescriptors) +void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::span drops, asset::IDescriptor::E_TYPE descriptorType) { if (getEnabledFeatures().nullDescriptor) { - // TODO: Write null descriptors here + core::vector vk_writeDescriptorSets(drops.size(),{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,nullptr}); + core::vector vk_writeDescriptorSetAS(69u,{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,nullptr}); + + size_t maxSize = 0; + for (auto i = 0; i < drops.size(); i++) + { + const auto& write = drops[i]; + maxSize = core::max(maxSize, write.count); + } + size_t descriptorSize; + switch (asset::IDescriptor::GetTypeCategory(descriptorType)) + { + case asset::IDescriptor::EC_BUFFER: + descriptorSize = sizeof(VkDescriptorBufferInfo); + break; + case asset::IDescriptor::EC_IMAGE: + descriptorSize = sizeof(VkDescriptorImageInfo); + break; + case asset::IDescriptor::EC_BUFFER_VIEW: + descriptorSize = sizeof(VkBufferView); + break; + case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: + descriptorSize = sizeof(VkAccelerationStructureKHR); + break; + } + + core::vector nullDescriptors(maxSize * descriptorSize, 0u); + + { + auto outWrite = vk_writeDescriptorSets.data(); + auto outWriteAS = vk_writeDescriptorSetAS.data(); + + for (auto i=0; idstSet = static_cast(write.dstSet)->getInternalObject(); + outWrite->dstBinding = write.binding; + outWrite->dstArrayElement = write.arrayElement; + outWrite->descriptorType = getVkDescriptorTypeFromDescriptorType(descriptorType); + outWrite->descriptorCount = write.count; + switch (asset::IDescriptor::GetTypeCategory(descriptorType)) + { + case asset::IDescriptor::EC_BUFFER: + outWrite->pBufferInfo = reinterpret_cast(nullDescriptors.data()); + break; + case asset::IDescriptor::EC_IMAGE: + outWrite->pImageInfo = reinterpret_cast(nullDescriptors.data()); + break; + case asset::IDescriptor::EC_BUFFER_VIEW: + outWrite->pTexelBufferView = reinterpret_cast(nullDescriptors.data()); + break; + case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: + outWriteAS->accelerationStructureCount = write.count; + outWriteAS->pAccelerationStructures = reinterpret_cast(nullDescriptors.data()); + break; + default: + assert(!"Invalid code path."); + } + outWrite++; + } + } + m_devf.vk.vkUpdateDescriptorSets(m_vkdev,vk_writeDescriptorSets.size(),vk_writeDescriptorSets.data(),0,nullptr); } } diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index b35fa7459c..c4161abe4f 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -282,7 +282,7 @@ class CVulkanLogicalDevice final : public ILogicalDevice // descriptor sets core::smart_refctd_ptr createDescriptorPool_impl(const IDescriptorPool::SCreateInfo& createInfo) override; void updateDescriptorSets_impl(const SUpdateDescriptorSetsParams& params) override; - void nullifyDescriptors_impl(const std::span dropDescriptors) override; + void nullifyDescriptors_impl(const std::span dropDescriptors, asset::IDescriptor::E_TYPE descriptorType) override; // renderpasses and framebuffers core::smart_refctd_ptr createRenderpass_impl(const IGPURenderpass::SCreationParams& params, IGPURenderpass::SCreationParamValidationResult&& validation) override; diff --git a/src/nbl/video/IGPUDescriptorSet.cpp b/src/nbl/video/IGPUDescriptorSet.cpp index 6ef0fa3578..1896a168d6 100644 --- a/src/nbl/video/IGPUDescriptorSet.cpp +++ b/src/nbl/video/IGPUDescriptorSet.cpp @@ -134,6 +134,8 @@ void IGPUDescriptorSet::dropDescriptors(const IGPUDescriptorSet::SDropDescriptor auto* dstDescriptors = drop.dstSet->getDescriptors(type, drop.binding); auto* dstSamplers = drop.dstSet->getMutableSamplers(drop.binding); + assert(dstDescriptors); + assert(dstSamplers); if (dstDescriptors) for (uint32_t i = 0; i < drop.count; i++) @@ -143,8 +145,12 @@ void IGPUDescriptorSet::dropDescriptors(const IGPUDescriptorSet::SDropDescriptor for (uint32_t i = 0; i < drop.count; i++) dstSamplers[drop.arrayElement + i] = nullptr; } - - incrementVersion(); + // we only increment the version to detect UPDATE-AFTER-BIND and automagically invalidate descriptor sets + // so, only if we do the path that writes descriptors, do we want to increment version + if (getOriginDevice()->getEnabledFeatures().nullDescriptor) + { + incrementVersion(); + } } bool IGPUDescriptorSet::validateCopy(const IGPUDescriptorSet::SCopyDescriptorSet& copy) const diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 7814a06dd9..2b35fff3cd 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -444,12 +444,19 @@ bool ILogicalDevice::updateDescriptorSets(const std::span dropDescriptors) +bool ILogicalDevice::nullifyDescriptors(const std::span dropDescriptors, asset::IDescriptor::E_TYPE descriptorType) { for (const auto& drop : dropDescriptors) - drop.dstSet->dropDescriptors(drop); + { + auto ds = drop.dstSet; + if (!ds || !ds->wasCreatedBy(this)) + return false; - nullifyDescriptors_impl(dropDescriptors); + ds->dropDescriptors(drop); + } + + nullifyDescriptors_impl(dropDescriptors, descriptorType); + return true; } core::smart_refctd_ptr ILogicalDevice::createRenderpass(const IGPURenderpass::SCreationParams& params) From 89e64406754ae10fb98de32fc4cf0e08b95033e5 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Tue, 12 Mar 2024 00:39:51 -0300 Subject: [PATCH 24/29] Fix multi timeline functionality --- include/nbl/video/IGPUDescriptorSet.h | 2 +- .../video/alloc/SubAllocatedDescriptorSet.h | 42 +++++++++++-------- src/nbl/video/IGPUDescriptorSet.cpp | 28 ++++++------- src/nbl/video/ILogicalDevice.cpp | 2 +- 4 files changed, 40 insertions(+), 34 deletions(-) diff --git a/include/nbl/video/IGPUDescriptorSet.h b/include/nbl/video/IGPUDescriptorSet.h index 6f141b6c0e..33957a542d 100644 --- a/include/nbl/video/IGPUDescriptorSet.h +++ b/include/nbl/video/IGPUDescriptorSet.h @@ -69,7 +69,7 @@ class IGPUDescriptorSet : public asset::IDescriptorSet; protected: struct SubAllocDescriptorSetRange { - EventHandler eventHandler = EventHandler({}); + std::unique_ptr eventHandler = nullptr; std::unique_ptr addressAllocator = nullptr; std::unique_ptr reservedAllocator = nullptr; size_t reservedSize = 0; asset::IDescriptor::E_TYPE descriptorType = asset::IDescriptor::E_TYPE::ET_COUNT; SubAllocDescriptorSetRange( + std::unique_ptr&& inEventHandler, std::unique_ptr&& inAddressAllocator, std::unique_ptr&& inReservedAllocator, size_t inReservedSize, asset::IDescriptor::E_TYPE inDescriptorType) : - eventHandler({}), addressAllocator(std::move(inAddressAllocator)), + eventHandler(std::move(inEventHandler)), addressAllocator(std::move(inAddressAllocator)), reservedAllocator(std::move(inReservedAllocator)), reservedSize(inReservedSize), descriptorType(inDescriptorType) {} @@ -118,12 +119,14 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted SubAllocDescriptorSetRange& operator=(SubAllocDescriptorSetRange&& other) { + eventHandler = std::move(other.eventHandler); addressAllocator = std::move(other.addressAllocator); reservedAllocator = std::move(other.reservedAllocator); reservedSize = other.reservedSize; descriptorType = other.descriptorType; // Nullify other + other.eventHandler = nullptr; other.addressAllocator = nullptr; other.reservedAllocator = nullptr; other.reservedSize = 0u; @@ -147,7 +150,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted // constructors inline SubAllocatedDescriptorSet(core::smart_refctd_ptr&& descriptorSet, - core::smart_refctd_ptr&& logicalDevice) : m_logicalDevice(std::move(logicalDevice)) + core::smart_refctd_ptr&& logicalDevice) { auto layout = descriptorSet->getLayout(); for (uint32_t descriptorType = 0; descriptorType < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); descriptorType++) @@ -175,12 +178,15 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted static_cast(0), 0u, MaxDescriptorSetAllocationAlignment, static_cast(count), MinDescriptorSetAllocationSize )); + auto eventHandler = std::unique_ptr(new EventHandler(core::smart_refctd_ptr(logicalDevice))); - m_allocatableRanges[binding.data] = SubAllocDescriptorSetRange(std::move(addressAllocator), std::move(reservedAllocator), reservedSize, descType); + m_allocatableRanges[binding.data] = SubAllocDescriptorSetRange(std::move(eventHandler), std::move(addressAllocator), std::move(reservedAllocator), reservedSize, descType); + assert(m_allocatableRanges[binding.data].eventHandler->getLogicalDevice()); } } } m_descriptorSet = std::move(descriptorSet); + m_logicalDevice = std::move(logicalDevice); } inline ~SubAllocatedDescriptorSet() @@ -272,7 +278,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted // FUTURE TODO: later we could only nullify the descriptors we don't end up reallocating if without robustness features nulls.resize(m_totalDeferredFrees); auto outNulls = nulls.data(); - eventHandler.wait(maxWaitPoint, unallocatedSize, outNulls); + eventHandler->wait(maxWaitPoint, unallocatedSize, outNulls); m_logicalDevice->nullifyDescriptors({ nulls.data(),outNulls }, range->second.descriptorType); // always call with the same parameters, otherwise this turns into a mess with the non invalid_address gaps @@ -299,17 +305,19 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted auto allocator = getBindingAllocator(binding); if (allocator) - for (size_type i=0; ifree_addr(addr[i],1); - outNullify->dstSet = m_descriptorSet.get(); - outNullify->binding = binding; - outNullify->arrayElement = i; - outNullify->count = 1; - outNullify++; + for (size_type i = 0; i < count; i++) + { + if (addr[i] == AddressAllocator::invalid_address) + continue; + + allocator->free_addr(addr[i], 1); + outNullify->dstSet = m_descriptorSet.get(); + outNullify->binding = binding; + outNullify->arrayElement = i; + outNullify->count = 1; + outNullify++; + } } return outNullify; } @@ -325,7 +333,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted auto& eventHandler = range->second.eventHandler; auto debugGuard = stAccessVerifyDebugGuard(); m_totalDeferredFrees += functor.getWorstCaseCount(); - eventHandler.latch(futureWait,std::move(functor)); + eventHandler->latch(futureWait,std::move(functor)); } // defers based on the conservative estimation if `futureWait` needs to be waited on, if doesn't will call nullify descriiptors internally immediately @@ -353,7 +361,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted for (uint32_t i = 0; i < m_allocatableRanges.size(); i++) { auto& it = m_allocatableRanges[i]; - frees += it.eventHandler.poll(outNulls).eventsLeft; + frees += it.eventHandler->poll(outNulls).eventsLeft; // TODO: this could be optimized to be put outside the loop m_logicalDevice->nullifyDescriptors({nulls.data(),outNulls}, it.descriptorType); } diff --git a/src/nbl/video/IGPUDescriptorSet.cpp b/src/nbl/video/IGPUDescriptorSet.cpp index 1896a168d6..cedc20bed2 100644 --- a/src/nbl/video/IGPUDescriptorSet.cpp +++ b/src/nbl/video/IGPUDescriptorSet.cpp @@ -124,27 +124,25 @@ void IGPUDescriptorSet::processWrite(const IGPUDescriptorSet::SWriteDescriptorSe incrementVersion(); } -void IGPUDescriptorSet::dropDescriptors(const IGPUDescriptorSet::SDropDescriptorSet& drop) +void IGPUDescriptorSet::dropDescriptors(const IGPUDescriptorSet::SDropDescriptorSet& drop, const asset::IDescriptor::E_TYPE type) { assert(drop.dstSet == this); - for (uint32_t t = 0; t < static_cast(asset::IDescriptor::E_TYPE::ET_COUNT); ++t) - { - const auto type = static_cast(t); + auto* dstDescriptors = drop.dstSet->getDescriptors(type, drop.binding); + auto* dstSamplers = drop.dstSet->getMutableSamplers(drop.binding); + assert(dstDescriptors); + // Samplers are only used in the combined image sampler descriptors + if (type == asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER) + assert(dstSamplers); - auto* dstDescriptors = drop.dstSet->getDescriptors(type, drop.binding); - auto* dstSamplers = drop.dstSet->getMutableSamplers(drop.binding); - assert(dstDescriptors); - assert(dstSamplers); + if (dstDescriptors) + for (uint32_t i = 0; i < drop.count; i++) + dstDescriptors[drop.arrayElement + i] = nullptr; - if (dstDescriptors) - for (uint32_t i = 0; i < drop.count; i++) - dstDescriptors[drop.arrayElement + i] = nullptr; + if (dstSamplers) + for (uint32_t i = 0; i < drop.count; i++) + dstSamplers[drop.arrayElement + i] = nullptr; - if (dstSamplers) - for (uint32_t i = 0; i < drop.count; i++) - dstSamplers[drop.arrayElement + i] = nullptr; - } // we only increment the version to detect UPDATE-AFTER-BIND and automagically invalidate descriptor sets // so, only if we do the path that writes descriptors, do we want to increment version if (getOriginDevice()->getEnabledFeatures().nullDescriptor) diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 2b35fff3cd..697f7db7c5 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -452,7 +452,7 @@ bool ILogicalDevice::nullifyDescriptors(const std::spanwasCreatedBy(this)) return false; - ds->dropDescriptors(drop); + ds->dropDescriptors(drop, descriptorType); } nullifyDescriptors_impl(dropDescriptors, descriptorType); From ede586fdf079ce5aa8ce8991d5745113ccfd13af Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Tue, 12 Mar 2024 00:39:56 -0300 Subject: [PATCH 25/29] Update example --- examples_tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests b/examples_tests index 9b6764f885..7906d1cac9 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 9b6764f885374b97c841f6f9e188505ca9d24601 +Subproject commit 7906d1cac91881862aca8295bab0726f76350fec From 55319038d7aeedeff2e3f490c3a48697da66727c Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Tue, 12 Mar 2024 19:29:07 -0300 Subject: [PATCH 26/29] Implement depletion of sub alloc descriptor set --- .../video/alloc/SubAllocatedDescriptorSet.h | 7 +- src/nbl/video/CVulkanLogicalDevice.cpp | 120 +++++++++--------- src/nbl/video/IGPUDescriptorSet.cpp | 4 - 3 files changed, 67 insertions(+), 64 deletions(-) diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index 58cc473159..be20dd69b1 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -191,12 +191,17 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted inline ~SubAllocatedDescriptorSet() { - // TODO: should nullify/destroy/poll the event deferred handler to completion, then as we iterate through `m_allocatableRanges` assert that the allocators have no allocations/everything is free + uint32_t remainingFrees; + do { + remainingFrees = cull_frees(); + } while (remainingFrees > 0); + for (uint32_t i = 0; i < m_allocatableRanges.size(); i++) { auto& range = m_allocatableRanges[i]; if (range.reservedSize == 0) continue; + assert(range.eventHandler->getTimelines().size() == 0); auto ptr = reinterpret_cast(core::address_allocator_traits::getReservedSpacePtr(*range.addressAllocator)); range.addressAllocator = nullptr; range.reservedAllocator->deallocate(const_cast(ptr), range.reservedSize); diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 7340893d4d..8ccf264aa5 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -731,70 +731,72 @@ void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::span vk_writeDescriptorSets(drops.size(),{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,nullptr}); - core::vector vk_writeDescriptorSetAS(69u,{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,nullptr}); - - size_t maxSize = 0; - for (auto i = 0; i < drops.size(); i++) - { - const auto& write = drops[i]; - maxSize = core::max(maxSize, write.count); - } - size_t descriptorSize; - switch (asset::IDescriptor::GetTypeCategory(descriptorType)) - { - case asset::IDescriptor::EC_BUFFER: - descriptorSize = sizeof(VkDescriptorBufferInfo); - break; - case asset::IDescriptor::EC_IMAGE: - descriptorSize = sizeof(VkDescriptorImageInfo); - break; - case asset::IDescriptor::EC_BUFFER_VIEW: - descriptorSize = sizeof(VkBufferView); - break; - case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: - descriptorSize = sizeof(VkAccelerationStructureKHR); - break; - } - - core::vector nullDescriptors(maxSize * descriptorSize, 0u); + return + } + core::vector vk_writeDescriptorSets(drops.size(),{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,nullptr}); + core::vector vk_writeDescriptorSetAS(69u,{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,nullptr}); + + size_t maxSize = 0; + for (auto i = 0; i < drops.size(); i++) + { + const auto& write = drops[i]; + maxSize = core::max(maxSize, write.count); + } + size_t descriptorSize; + switch (asset::IDescriptor::GetTypeCategory(descriptorType)) + { + case asset::IDescriptor::EC_BUFFER: + descriptorSize = sizeof(VkDescriptorBufferInfo); + break; + case asset::IDescriptor::EC_IMAGE: + descriptorSize = sizeof(VkDescriptorImageInfo); + break; + case asset::IDescriptor::EC_BUFFER_VIEW: + descriptorSize = sizeof(VkBufferView); + break; + case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: + descriptorSize = sizeof(VkAccelerationStructureKHR); + break; + } + + core::vector nullDescriptors(maxSize * descriptorSize, 0u); + + { + auto outWrite = vk_writeDescriptorSets.data(); + auto outWriteAS = vk_writeDescriptorSetAS.data(); + + for (auto i=0; idstSet = static_cast(write.dstSet)->getInternalObject(); + outWrite->dstBinding = write.binding; + outWrite->dstArrayElement = write.arrayElement; + outWrite->descriptorType = getVkDescriptorTypeFromDescriptorType(descriptorType); + outWrite->descriptorCount = write.count; + switch (asset::IDescriptor::GetTypeCategory(descriptorType)) { - const auto& write = drops[i]; - - outWrite->dstSet = static_cast(write.dstSet)->getInternalObject(); - outWrite->dstBinding = write.binding; - outWrite->dstArrayElement = write.arrayElement; - outWrite->descriptorType = getVkDescriptorTypeFromDescriptorType(descriptorType); - outWrite->descriptorCount = write.count; - switch (asset::IDescriptor::GetTypeCategory(descriptorType)) - { - case asset::IDescriptor::EC_BUFFER: - outWrite->pBufferInfo = reinterpret_cast(nullDescriptors.data()); - break; - case asset::IDescriptor::EC_IMAGE: - outWrite->pImageInfo = reinterpret_cast(nullDescriptors.data()); - break; - case asset::IDescriptor::EC_BUFFER_VIEW: - outWrite->pTexelBufferView = reinterpret_cast(nullDescriptors.data()); - break; - case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: - outWriteAS->accelerationStructureCount = write.count; - outWriteAS->pAccelerationStructures = reinterpret_cast(nullDescriptors.data()); - break; - default: - assert(!"Invalid code path."); - } - outWrite++; + case asset::IDescriptor::EC_BUFFER: + outWrite->pBufferInfo = reinterpret_cast(nullDescriptors.data()); + break; + case asset::IDescriptor::EC_IMAGE: + outWrite->pImageInfo = reinterpret_cast(nullDescriptors.data()); + break; + case asset::IDescriptor::EC_BUFFER_VIEW: + outWrite->pTexelBufferView = reinterpret_cast(nullDescriptors.data()); + break; + case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: + outWriteAS->accelerationStructureCount = write.count; + outWriteAS->pAccelerationStructures = reinterpret_cast(nullDescriptors.data()); + break; + default: + assert(!"Invalid code path."); } + outWrite++; } - m_devf.vk.vkUpdateDescriptorSets(m_vkdev,vk_writeDescriptorSets.size(),vk_writeDescriptorSets.data(),0,nullptr); - } + } + m_devf.vk.vkUpdateDescriptorSets(m_vkdev,vk_writeDescriptorSets.size(),vk_writeDescriptorSets.data(),0,nullptr); } diff --git a/src/nbl/video/IGPUDescriptorSet.cpp b/src/nbl/video/IGPUDescriptorSet.cpp index cedc20bed2..0e13f8519a 100644 --- a/src/nbl/video/IGPUDescriptorSet.cpp +++ b/src/nbl/video/IGPUDescriptorSet.cpp @@ -130,10 +130,6 @@ void IGPUDescriptorSet::dropDescriptors(const IGPUDescriptorSet::SDropDescriptor auto* dstDescriptors = drop.dstSet->getDescriptors(type, drop.binding); auto* dstSamplers = drop.dstSet->getMutableSamplers(drop.binding); - assert(dstDescriptors); - // Samplers are only used in the combined image sampler descriptors - if (type == asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER) - assert(dstSamplers); if (dstDescriptors) for (uint32_t i = 0; i < drop.count; i++) From 79c3a23e2831dd1c96e21c841625c2f0441c6bf7 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Tue, 12 Mar 2024 23:48:39 -0300 Subject: [PATCH 27/29] Fix API for nullify --- include/nbl/video/IGPUDescriptorSet.h | 28 +++++++------ include/nbl/video/ILogicalDevice.h | 4 +- .../video/alloc/SubAllocatedDescriptorSet.h | 7 ++-- src/nbl/video/CVulkanLogicalDevice.cpp | 42 ++++++++++--------- src/nbl/video/CVulkanLogicalDevice.h | 2 +- src/nbl/video/IGPUDescriptorSet.cpp | 6 ++- src/nbl/video/ILogicalDevice.cpp | 10 +++-- 7 files changed, 54 insertions(+), 45 deletions(-) diff --git a/include/nbl/video/IGPUDescriptorSet.h b/include/nbl/video/IGPUDescriptorSet.h index 33957a542d..f81f8190bf 100644 --- a/include/nbl/video/IGPUDescriptorSet.h +++ b/include/nbl/video/IGPUDescriptorSet.h @@ -57,6 +57,20 @@ class IGPUDescriptorSet : public asset::IDescriptorSet(asset::IDescriptor::E_TYPE::ET_COUNT); t++) + { + const auto type = static_cast(t); + const auto& bindingRedirect = getLayout()->getDescriptorRedirect(type); + if (bindingRedirect.getStorageOffset(redirect_t::binding_number_t{binding}).data!=redirect_t::Invalid) + return type; + } + return asset::IDescriptor::E_TYPE::ET_COUNT; + } + protected: IGPUDescriptorSet(core::smart_refctd_ptr&& _layout, core::smart_refctd_ptr&& pool, IDescriptorPool::SStorageOffsets&& offsets); virtual ~IGPUDescriptorSet(); @@ -69,7 +83,7 @@ class IGPUDescriptorSet : public asset::IDescriptorSet(asset::IDescriptor::E_TYPE::ET_COUNT); t++) - { - const auto type = static_cast(t); - const auto& bindingRedirect = getLayout()->getDescriptorRedirect(type); - if (bindingRedirect.getStorageOffset(redirect_t::binding_number_t{binding}).data!=redirect_t::Invalid) - return type; - } - return asset::IDescriptor::E_TYPE::ET_COUNT; - } inline core::smart_refctd_ptr* getMutableSamplers(const uint32_t binding) const { diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 16176ef880..8b831fc3b5 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -633,7 +633,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe } // should this be joined together with the existing updateDescriptorSets? - bool nullifyDescriptors(const std::span dropDescriptors, asset::IDescriptor::E_TYPE descriptorType); + bool nullifyDescriptors(const std::span dropDescriptors); //! Renderpasses and Framebuffers core::smart_refctd_ptr createRenderpass(const IGPURenderpass::SCreationParams& params); @@ -853,7 +853,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe // Drops refcounted references of the descriptors in these indices for the descriptor lifetime tracking // If the nullDescriptor device feature is enabled, this would also write a null descriptor to the descriptor set - virtual void nullifyDescriptors_impl(const std::span dropDescriptors, asset::IDescriptor::E_TYPE descriptorType) = 0; + virtual void nullifyDescriptors_impl(const std::span dropDescriptors) = 0; virtual core::smart_refctd_ptr createRenderpass_impl(const IGPURenderpass::SCreationParams& params, IGPURenderpass::SCreationParamValidationResult&& validation) = 0; virtual core::smart_refctd_ptr createFramebuffer_impl(IGPUFramebuffer::SCreationParams&& params) = 0; diff --git a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h index be20dd69b1..601f0ee12a 100644 --- a/include/nbl/video/alloc/SubAllocatedDescriptorSet.h +++ b/include/nbl/video/alloc/SubAllocatedDescriptorSet.h @@ -284,7 +284,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted nulls.resize(m_totalDeferredFrees); auto outNulls = nulls.data(); eventHandler->wait(maxWaitPoint, unallocatedSize, outNulls); - m_logicalDevice->nullifyDescriptors({ nulls.data(),outNulls }, range->second.descriptorType); + m_logicalDevice->nullifyDescriptors({ nulls.data(),outNulls }); // always call with the same parameters, otherwise this turns into a mess with the non invalid_address gaps unallocatedSize = try_multi_allocate(binding,count,outAddresses); @@ -352,7 +352,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted auto actualEnd = multi_deallocate(nulls.data(), binding, count, addr); // This is checked to be valid above auto range = m_allocatableRanges.find(binding); - m_logicalDevice->nullifyDescriptors({nulls.data(),actualEnd}, range->second.descriptorType); + m_logicalDevice->nullifyDescriptors({nulls.data(),actualEnd}); } } @@ -367,9 +367,8 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted { auto& it = m_allocatableRanges[i]; frees += it.eventHandler->poll(outNulls).eventsLeft; - // TODO: this could be optimized to be put outside the loop - m_logicalDevice->nullifyDescriptors({nulls.data(),outNulls}, it.descriptorType); } + m_logicalDevice->nullifyDescriptors({nulls.data(),outNulls}); return frees; } }; diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 8ccf264aa5..b7eb7110c2 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -727,11 +727,11 @@ void CVulkanLogicalDevice::updateDescriptorSets_impl(const SUpdateDescriptorSets m_devf.vk.vkUpdateDescriptorSets(m_vkdev,vk_writeDescriptorSets.size(),vk_writeDescriptorSets.data(),vk_copyDescriptorSets.size(),vk_copyDescriptorSets.data()); } -void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::span drops, asset::IDescriptor::E_TYPE descriptorType) +void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::span drops) { if (getEnabledFeatures().nullDescriptor) { - return + return; } core::vector vk_writeDescriptorSets(drops.size(),{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,nullptr}); @@ -741,26 +741,27 @@ void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::spangetBindingType(write.binding); + size_t descriptorSize; + switch (asset::IDescriptor::GetTypeCategory(descriptorType)) + { + case asset::IDescriptor::EC_BUFFER: + descriptorSize = sizeof(VkDescriptorBufferInfo); + break; + case asset::IDescriptor::EC_IMAGE: + descriptorSize = sizeof(VkDescriptorImageInfo); + break; + case asset::IDescriptor::EC_BUFFER_VIEW: + descriptorSize = sizeof(VkBufferView); + break; + case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: + descriptorSize = sizeof(VkAccelerationStructureKHR); + break; + } + maxSize = core::max(maxSize, write.count * descriptorSize); } - core::vector nullDescriptors(maxSize * descriptorSize, 0u); + core::vector nullDescriptors(maxSize, 0u); { auto outWrite = vk_writeDescriptorSets.data(); @@ -769,6 +770,7 @@ void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::spangetBindingType(write.binding); outWrite->dstSet = static_cast(write.dstSet)->getInternalObject(); outWrite->dstBinding = write.binding; diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index c4161abe4f..b35fa7459c 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -282,7 +282,7 @@ class CVulkanLogicalDevice final : public ILogicalDevice // descriptor sets core::smart_refctd_ptr createDescriptorPool_impl(const IDescriptorPool::SCreateInfo& createInfo) override; void updateDescriptorSets_impl(const SUpdateDescriptorSetsParams& params) override; - void nullifyDescriptors_impl(const std::span dropDescriptors, asset::IDescriptor::E_TYPE descriptorType) override; + void nullifyDescriptors_impl(const std::span dropDescriptors) override; // renderpasses and framebuffers core::smart_refctd_ptr createRenderpass_impl(const IGPURenderpass::SCreationParams& params, IGPURenderpass::SCreationParamValidationResult&& validation) override; diff --git a/src/nbl/video/IGPUDescriptorSet.cpp b/src/nbl/video/IGPUDescriptorSet.cpp index 0e13f8519a..e29211f019 100644 --- a/src/nbl/video/IGPUDescriptorSet.cpp +++ b/src/nbl/video/IGPUDescriptorSet.cpp @@ -124,11 +124,13 @@ void IGPUDescriptorSet::processWrite(const IGPUDescriptorSet::SWriteDescriptorSe incrementVersion(); } -void IGPUDescriptorSet::dropDescriptors(const IGPUDescriptorSet::SDropDescriptorSet& drop, const asset::IDescriptor::E_TYPE type) +void IGPUDescriptorSet::dropDescriptors(const IGPUDescriptorSet::SDropDescriptorSet& drop) { assert(drop.dstSet == this); - auto* dstDescriptors = drop.dstSet->getDescriptors(type, drop.binding); + const auto descriptorType = getBindingType(drop.binding); + + auto* dstDescriptors = drop.dstSet->getDescriptors(descriptorType, drop.binding); auto* dstSamplers = drop.dstSet->getMutableSamplers(drop.binding); if (dstDescriptors) diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 697f7db7c5..3f00a80cfb 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -444,18 +444,22 @@ bool ILogicalDevice::updateDescriptorSets(const std::span dropDescriptors, asset::IDescriptor::E_TYPE descriptorType) +bool ILogicalDevice::nullifyDescriptors(const std::span dropDescriptors) { for (const auto& drop : dropDescriptors) { auto ds = drop.dstSet; if (!ds || !ds->wasCreatedBy(this)) return false; + } - ds->dropDescriptors(drop, descriptorType); + for (const auto& drop : dropDescriptors) + { + auto ds = drop.dstSet; + ds->dropDescriptors(drop); } - nullifyDescriptors_impl(dropDescriptors, descriptorType); + nullifyDescriptors_impl(dropDescriptors); return true; } From 6b5630d986689a566a1eed3ab6d9749f69c901ab Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Tue, 12 Mar 2024 23:54:13 -0300 Subject: [PATCH 28/29] Fix tabs & spaces --- src/nbl/video/CVulkanLogicalDevice.cpp | 54 +++++++++++++------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index b7eb7110c2..ff5febb4b1 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -745,18 +745,18 @@ void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::spandescriptorCount = write.count; switch (asset::IDescriptor::GetTypeCategory(descriptorType)) { - case asset::IDescriptor::EC_BUFFER: - outWrite->pBufferInfo = reinterpret_cast(nullDescriptors.data()); - break; - case asset::IDescriptor::EC_IMAGE: - outWrite->pImageInfo = reinterpret_cast(nullDescriptors.data()); - break; - case asset::IDescriptor::EC_BUFFER_VIEW: - outWrite->pTexelBufferView = reinterpret_cast(nullDescriptors.data()); - break; - case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: - outWriteAS->accelerationStructureCount = write.count; - outWriteAS->pAccelerationStructures = reinterpret_cast(nullDescriptors.data()); - break; - default: - assert(!"Invalid code path."); + case asset::IDescriptor::EC_BUFFER: + outWrite->pBufferInfo = reinterpret_cast(nullDescriptors.data()); + break; + case asset::IDescriptor::EC_IMAGE: + outWrite->pImageInfo = reinterpret_cast(nullDescriptors.data()); + break; + case asset::IDescriptor::EC_BUFFER_VIEW: + outWrite->pTexelBufferView = reinterpret_cast(nullDescriptors.data()); + break; + case asset::IDescriptor::EC_ACCELERATION_STRUCTURE: + outWriteAS->accelerationStructureCount = write.count; + outWriteAS->pAccelerationStructures = reinterpret_cast(nullDescriptors.data()); + break; + default: + assert(!"Invalid code path."); } outWrite++; } From aca4c74271b56a133aee9bcd7435fe799ef90d17 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Wed, 13 Mar 2024 00:00:51 -0300 Subject: [PATCH 29/29] More PR reviews --- src/nbl/video/CVulkanLogicalDevice.cpp | 8 ++++++-- src/nbl/video/ILogicalDevice.cpp | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index ff5febb4b1..847885da02 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -635,12 +635,15 @@ core::smart_refctd_ptr CVulkanLogicalDevice::createDescriptorPo return nullptr; } +// a lot of empirical research went into defining this constant +constexpr uint32_t MaxDescriptorSetAsWrites = 69u; + void CVulkanLogicalDevice::updateDescriptorSets_impl(const SUpdateDescriptorSetsParams& params) { // Each pNext member of any structure (including this one) in the pNext chain must be either NULL or a pointer to a valid instance of // VkWriteDescriptorSetAccelerationStructureKHR, VkWriteDescriptorSetAccelerationStructureNV, or VkWriteDescriptorSetInlineUniformBlockEXT core::vector vk_writeDescriptorSets(params.writes.size(),{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,nullptr}); - core::vector vk_writeDescriptorSetAS(69u,{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,nullptr}); + core::vector vk_writeDescriptorSetAS(MaxDescriptorSetAsWrites,{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,nullptr}); core::vector vk_bufferInfos(params.bufferCount); core::vector vk_imageInfos(params.imageCount); @@ -735,7 +738,7 @@ void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::span vk_writeDescriptorSets(drops.size(),{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,nullptr}); - core::vector vk_writeDescriptorSetAS(69u,{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,nullptr}); + core::vector vk_writeDescriptorSetAS(MaxDescriptorSetAsWrites,{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,nullptr}); size_t maxSize = 0; for (auto i = 0; i < drops.size(); i++) @@ -791,6 +794,7 @@ void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::spanaccelerationStructureCount = write.count; outWriteAS->pAccelerationStructures = reinterpret_cast(nullDescriptors.data()); + outWrite->pNext = outWriteAS++; break; default: assert(!"Invalid code path."); diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index 3f00a80cfb..2c852c7631 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -451,6 +451,9 @@ bool ILogicalDevice::nullifyDescriptors(const std::spanwasCreatedBy(this)) return false; + // (no binding) + if (ds->getBindingType(drop.binding) == asset::IDescriptor::E_TYPE::ET_COUNT) + return false; } for (const auto& drop : dropDescriptors)