Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sub-allocated descriptor sets #657

Merged
merged 30 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
cc54740
Add sub-allocated descriptor set header
deprilula28 Feb 19, 2024
d10059f
Add some reusable binding API
deprilula28 Feb 19, 2024
347ff63
Work on using descriptor set layout directly
deprilula28 Feb 20, 2024
e1282c7
Remove out old bindings
deprilula28 Feb 20, 2024
28611be
Use pool address allocator
deprilula28 Feb 21, 2024
6c6046c
Use map
deprilula28 Feb 21, 2024
190067a
PR reviews
deprilula28 Feb 21, 2024
289e424
PR reviews
deprilula28 Feb 21, 2024
e0e91ff
Work on deferred freeing the descriptors
deprilula28 Feb 26, 2024
68582ea
Work on having descriptor set match with its sub allocator
deprilula28 Feb 26, 2024
d716848
PR reviews
deprilula28 Feb 27, 2024
4fd4b8f
Fix example
deprilula28 Feb 27, 2024
58c4e90
Add writing of descriptors on the allocate method
deprilula28 Feb 27, 2024
41b9a5b
Work on try allocate and timings
deprilula28 Feb 27, 2024
d608e78
Add PR comments
deprilula28 Feb 28, 2024
b326ca7
Work on nullifying descriptors
deprilula28 Feb 29, 2024
894a47c
Keep descriptor writes outside the allocate function
deprilula28 Mar 4, 2024
59c65ae
Include exporting of allocate descriptor writes instead of using them
deprilula28 Mar 4, 2024
1228f5f
Merge branch 'vulkan_1_3' into suballocdescriptorset
deprilula28 Mar 4, 2024
54250a6
Update examples submodule
deprilula28 Mar 4, 2024
2c35289
Update SubAllocatedDescriptorSet.h
devshgraphicsprogramming Mar 5, 2024
a2e2be4
Forgot that the nullification needs to be done between event-wait and…
devshgraphicsprogramming Mar 5, 2024
bc5b22d
PR review and fix compilation errors
deprilula28 Mar 8, 2024
912ed7a
PR reviews & nullifying descriptors
deprilula28 Mar 11, 2024
89e6440
Fix multi timeline functionality
deprilula28 Mar 12, 2024
ede586f
Update example
deprilula28 Mar 12, 2024
5531903
Implement depletion of sub alloc descriptor set
deprilula28 Mar 12, 2024
79c3a23
Fix API for nullify
deprilula28 Mar 13, 2024
6b5630d
Fix tabs & spaces
deprilula28 Mar 13, 2024
aca4c74
More PR reviews
deprilula28 Mar 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions include/nbl/asset/IDescriptorSetLayout.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ class IDescriptorSetLayout : public virtual core::IReferenceCounted // TODO: tr
return m_stageFlags[index.data];
}

inline core::bitflag<typename SBinding::E_CREATE_FLAGS> getCreateFlags(const storage_range_index_t index) const
{
assert(index.data < m_count);
return m_createFlags[index.data];
}

inline uint32_t getCount(const storage_range_index_t index) const
{
assert(index.data < m_count);
Expand Down
22 changes: 21 additions & 1 deletion include/nbl/core/alloc/address_allocator_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,18 @@ namespace nbl::core
}
}

static inline void multi_alloc_addr(AddressAlloc& alloc, uint32_t count, size_type* outAddresses, const size_type* bytes,
const size_type alignment, const size_type* hint=nullptr) noexcept
{
for (uint32_t i=0; i<count; i++)
{
if (outAddresses[i]!=AddressAlloc::invalid_address)
continue;

outAddresses[i] = alloc.alloc_addr(bytes[i],alignment,hint ? hint[i]:0ull);
}
}

static inline void multi_free_addr(AddressAlloc& alloc, uint32_t count, const size_type* addr, const size_type* bytes) noexcept
{
for (uint32_t i=0; i<count; i++)
Expand Down Expand Up @@ -186,6 +198,14 @@ namespace nbl::core
alloc,std::min(count-i,maxMultiOps),outAddresses+i,bytes+i,alignment+i,hint ? (hint+i):nullptr);
}

static inline void multi_alloc_addr(AddressAlloc& alloc, uint32_t count, size_type* outAddresses,
const size_type* bytes, const size_type alignment, const size_type* hint=nullptr) noexcept
{
for (uint32_t i=0; i<count; i+=maxMultiOps)
impl::address_allocator_traits_base<AddressAlloc,has_func_multi_alloc_addr<AddressAlloc>::value>::multi_alloc_addr(
alloc,std::min(count-i,maxMultiOps),outAddresses+i,bytes+i,alignment,hint ? (hint+i):nullptr);
}

static inline void multi_free_addr(AddressAlloc& alloc, uint32_t count, const size_type* addr, const size_type* bytes) noexcept
{
for (uint32_t i=0; i<count; i+=maxMultiOps)
Expand Down Expand Up @@ -244,4 +264,4 @@ namespace nbl::core

}

#endif
#endif
140 changes: 140 additions & 0 deletions include/nbl/video/alloc/SubAllocatedDescriptorSet.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h

#ifndef _NBL_VIDEO_SUB_ALLOCATED_DESCRIPTOR_SET_H_
#define _NBL_VIDEO_SUB_ALLOCATED_DESCRIPTOR_SET_H

#include "nbl/video/alloc/IBufferAllocator.h"

#include <type_traits>

namespace nbl::video
{

class SubAllocatedDescriptorSet : public core::IReferenceCounted
{
public:
// address allocator gives offsets
// reserved allocator allocates memory to keep the address allocator state inside
using AddressAllocator = core::PoolAddressAllocator<uint32_t>;
using ReservedAllocator = core::allocator<uint8_t>;
using size_type = typename AddressAllocator::size_type;
using value_type = typename AddressAllocator::size_type;
static constexpr value_type invalid_value = AddressAllocator::invalid_address;

protected:
struct SubAllocDescriptorSetRange {
std::shared_ptr<AddressAllocator> addressAllocator;
std::shared_ptr<ReservedAllocator> reservedAllocator;
devshgraphicsprogramming marked this conversation as resolved.
Show resolved Hide resolved
size_t reservedSize;
};
std::map<uint32_t, SubAllocDescriptorSetRange> m_allocatableRanges = {};

devshgraphicsprogramming marked this conversation as resolved.
Show resolved Hide resolved
#ifdef _NBL_DEBUG
std::recursive_mutex stAccessVerfier;
#endif // _NBL_DEBUG

constexpr static inline uint32_t MaxDescriptorSetAllocationAlignment = 64u*1024u; // if you need larger alignments then you're not right in the head
constexpr static inline uint32_t MinDescriptorSetAllocationSize = 1u;
devshgraphicsprogramming marked this conversation as resolved.
Show resolved Hide resolved

public:
// constructors
template<typename... Args>
inline SubAllocatedDescriptorSet(video::IGPUDescriptorSetLayout* layout)
{
for (uint32_t descriptorType = 0; descriptorType < static_cast<uint32_t>(asset::IDescriptor::E_TYPE::ET_COUNT); descriptorType++)
{
auto descType = static_cast<asset::IDescriptor::E_TYPE>(descriptorType);
auto& redirect = layout->getDescriptorRedirect(descType);

for (uint32_t i = 0; i < redirect.getBindingCount(); i++)
{
auto binding = redirect.getBinding(i);
auto storageIndex = redirect.findBindingStorageIndex(binding);

auto count = redirect.getCount(storageIndex);
auto flags = redirect.getCreateFlags(storageIndex);

// Only bindings with these flags will be allocatable
if (flags.hasFlags(core::bitflag(IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT)
| IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT
| IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT))
{
SubAllocDescriptorSetRange range;
range.reservedSize = AddressAllocator::reserved_size(MaxDescriptorSetAllocationAlignment, static_cast<size_type>(count), MinDescriptorSetAllocationSize);
range.reservedAllocator = std::shared_ptr<ReservedAllocator>(new ReservedAllocator());
range.addressAllocator = std::shared_ptr<AddressAllocator>(new AddressAllocator(
range.reservedAllocator->allocate(range.reservedSize, _NBL_SIMD_ALIGNMENT),
static_cast<size_type>(0), 0u, MaxDescriptorSetAllocationAlignment, static_cast<size_type>(count),
MinDescriptorSetAllocationSize
));
m_allocatableRanges.emplace(binding.data, range);
}
}
}
}

~SubAllocatedDescriptorSet()
{
for (uint32_t i = 0; i < m_allocatableRanges.size(); i++)
{
auto& range = m_allocatableRanges[i];
if (range.reservedSize == 0)
continue;
auto ptr = reinterpret_cast<const uint8_t*>(core::address_allocator_traits<AddressAllocator>::getReservedSpacePtr(*range.addressAllocator));
range.addressAllocator->~PoolAddressAllocator();
devshgraphicsprogramming marked this conversation as resolved.
Show resolved Hide resolved
range.reservedAllocator->deallocate(const_cast<uint8_t*>(ptr), range.reservedSize);
devshgraphicsprogramming marked this conversation as resolved.
Show resolved Hide resolved
}
}

// whether that binding index can be sub-allocated
bool isBindingAllocatable(uint32_t binding) { return m_allocatableRanges.find(binding) != m_allocatableRanges.end(); }

AddressAllocator* getBindingAllocator(uint32_t binding)
{
auto range = m_allocatableRanges.find(binding);
assert(range != m_allocatableRanges.end());// Check if this binding has an allocator
devshgraphicsprogramming marked this conversation as resolved.
Show resolved Hide resolved
return range->second.addressAllocator.get();
}

// main methods
devshgraphicsprogramming marked this conversation as resolved.
Show resolved Hide resolved

//! Warning `outAddresses` needs to be primed with `invalid_value` values, otherwise no allocation happens for elements not equal to `invalid_value`
inline void multi_allocate(uint32_t binding, uint32_t count, value_type* outAddresses)
{
#ifdef _NBL_DEBUG
std::unique_lock<std::recursive_mutex> tLock(stAccessVerfier,std::try_to_lock_t());
assert(tLock.owns_lock());
#endif // _NBL_DEBUG

auto allocator = getBindingAllocator(binding);
for (uint32_t i=0; i<count; i++)
{
if (outAddresses[i]!=AddressAllocator::invalid_address)
continue;

outAddresses[i] = allocator->alloc_addr(1,1);
}
}
inline void multi_deallocate(uint32_t binding, uint32_t count, const size_type* addr)
{
#ifdef _NBL_DEBUG
std::unique_lock<std::recursive_mutex> tLock(stAccessVerfier,std::try_to_lock_t());
assert(tLock.owns_lock());
#endif // _NBL_DEBUG

auto allocator = getBindingAllocator(binding);
for (uint32_t i=0; i<count; i++)
{
if (addr[i]==AddressAllocator::invalid_address)
continue;

allocator->free_addr(addr[i],1);
}
}
};

}

#endif