From c64f295e8133d0a7241b526e3ee9e79eb4f4f147 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Fri, 16 Feb 2024 15:56:28 -0300 Subject: [PATCH 01/12] Add sub-allocated descriptor set example --- 67_SubAllocatedDescriptorSet/CMakeLists.txt | 24 +++ .../app_resources/common.hlsl | 20 ++ .../app_resources/shader.comp.hlsl | 33 +++ .../config.json.template | 28 +++ 67_SubAllocatedDescriptorSet/main.cpp | 191 ++++++++++++++++++ 67_SubAllocatedDescriptorSet/pipeline.groovy | 50 +++++ CMakeLists.txt | 1 + 7 files changed, 347 insertions(+) create mode 100644 67_SubAllocatedDescriptorSet/CMakeLists.txt create mode 100644 67_SubAllocatedDescriptorSet/app_resources/common.hlsl create mode 100644 67_SubAllocatedDescriptorSet/app_resources/shader.comp.hlsl create mode 100644 67_SubAllocatedDescriptorSet/config.json.template create mode 100644 67_SubAllocatedDescriptorSet/main.cpp create mode 100644 67_SubAllocatedDescriptorSet/pipeline.groovy diff --git a/67_SubAllocatedDescriptorSet/CMakeLists.txt b/67_SubAllocatedDescriptorSet/CMakeLists.txt new file mode 100644 index 000000000..bc1624875 --- /dev/null +++ b/67_SubAllocatedDescriptorSet/CMakeLists.txt @@ -0,0 +1,24 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() \ No newline at end of file diff --git a/67_SubAllocatedDescriptorSet/app_resources/common.hlsl b/67_SubAllocatedDescriptorSet/app_resources/common.hlsl new file mode 100644 index 000000000..456dc6740 --- /dev/null +++ b/67_SubAllocatedDescriptorSet/app_resources/common.hlsl @@ -0,0 +1,20 @@ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +// Unfortunately not every piece of C++14 metaprogramming syntax is available in HLSL 202x +// https://github.com/microsoft/DirectXShaderCompiler/issues/5751#issuecomment-1800847954 +typedef nbl::hlsl::float32_t3 input_t; +typedef nbl::hlsl::float32_t output_t; + +NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxPossibleElementCount = 1 << 20; + +struct PushConstantData +{ + uint64_t inputAddress; + uint64_t outputAddress; + uint32_t dataElementCount; +}; + +NBL_CONSTEXPR uint32_t WorkgroupSize = 256; + +// Yes we do have our own re-creation of C++'s STL in HLSL2021 ! +#include "nbl/builtin/hlsl/limits.hlsl" \ No newline at end of file diff --git a/67_SubAllocatedDescriptorSet/app_resources/shader.comp.hlsl b/67_SubAllocatedDescriptorSet/app_resources/shader.comp.hlsl new file mode 100644 index 000000000..4aeef0e0f --- /dev/null +++ b/67_SubAllocatedDescriptorSet/app_resources/shader.comp.hlsl @@ -0,0 +1,33 @@ +#include "common.hlsl" + +// just a small test +#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl" + +[[vk::push_constant]] PushConstantData pushConstants; + +// does absolutely nothing, a later example will show how it gets used +template +void dummyTraitTest() {} + +[numthreads(WorkgroupSize,1,1)] +void main(uint32_t3 ID : SV_DispatchThreadID) +{ + dummyTraitTest(); + if (ID.x>=pushConstants.dataElementCount) + return; + + const input_t self = vk::RawBufferLoad(pushConstants.inputAddress+sizeof(input_t)*ID.x); + + nbl::hlsl::Xoroshiro64StarStar rng = nbl::hlsl::Xoroshiro64StarStar::construct(uint32_t2(pushConstants.dataElementCount,ID.x)^0xdeadbeefu); + + float32_t acc = nbl::hlsl::numeric_limits::max; + const static uint32_t OthersToTest = 15; + [[unroll(OthersToTest)]] + for (uint32_t i=0; i(pushConstants.inputAddress+sizeof(input_t)*offset); + acc = min(length(other-self),acc); + } + vk::RawBufferStore(pushConstants.outputAddress+sizeof(float32_t)*ID.x,acc); +} \ No newline at end of file diff --git a/67_SubAllocatedDescriptorSet/config.json.template b/67_SubAllocatedDescriptorSet/config.json.template new file mode 100644 index 000000000..717d05d53 --- /dev/null +++ b/67_SubAllocatedDescriptorSet/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", // should be none + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", // we also need to run in Debug nad RWDI because foundational example + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/67_SubAllocatedDescriptorSet/main.cpp b/67_SubAllocatedDescriptorSet/main.cpp new file mode 100644 index 000000000..fe021929b --- /dev/null +++ b/67_SubAllocatedDescriptorSet/main.cpp @@ -0,0 +1,191 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + + +#include "nbl/video/surface/CSurfaceVulkan.h" +#include "nbl/video/alloc/SubAllocatedDescriptorSet.h" + +#include "../common/BasicMultiQueueApplication.hpp" +#include "../common/MonoAssetManagerAndBuiltinResourceApplication.hpp" + +using namespace nbl; +using namespace core; +using namespace system; +using namespace ui; +using namespace asset; +using namespace video; + +#include "app_resources/common.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" + +// In this application we'll cover buffer streaming, Buffer Device Address (BDA) and push constants +class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplication, public examples::MonoAssetManagerAndBuiltinResourceApplication +{ + using device_base_t = examples::MonoDeviceApplication; + using asset_base_t = examples::MonoAssetManagerAndBuiltinResourceApplication; + + // The pool cache is just a formalized way of round-robining command pools and resetting + reusing them after their most recent submit signals finished. + // Its a little more ergonomic to use if you don't have a 1:1 mapping between frames and pools. + smart_refctd_ptr m_poolCache; + + smart_refctd_ptr>> m_subAllocDescriptorSet; + + // This example really lets the advantages of a timeline semaphore shine through! + smart_refctd_ptr m_timeline; + uint64_t m_iteration = 0; + constexpr static inline uint64_t MaxIterations = 200; + + constexpr static inline uint32_t MaxDescriptorSetAllocationAlignment = 64u*1024u; // if you need larger alignments then you're not right in the head + constexpr static inline uint32_t MinDescriptorSetAllocationSize = 1u; + + public: + // Yay thanks to multiple inheritance we cannot forward ctors anymore + SubAllocatedDescriptorSetApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + system::IApplicationFramework(_localInputCWD,_localOutputCWD,_sharedInputCWD,_sharedOutputCWD) {} + + // we stuff all our work here because its a "single shot" app + bool onAppInitialized(smart_refctd_ptr&& system) override + { + using nbl::video::IGPUDescriptorSetLayout; + + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(std::move(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + + // We'll allow subsequent iterations to overlap each other on the GPU, the only limiting factors are + // the amount of memory in the streaming buffers and the number of commandpools we can use simultaenously. + constexpr auto MaxConcurrency = 64; + + // Since this time we don't throw the Command Pools away and we'll reset them instead, we don't create the pools with the transient flag + m_poolCache = ICommandPoolCache::create(core::smart_refctd_ptr(m_device),getComputeQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::NONE,MaxConcurrency); + + // In contrast to fences, we just need one semaphore to rule all dispatches + m_timeline = m_device->createSemaphore(m_iteration); + + // Descriptor set sub allocator + + video::IGPUDescriptorSetLayout::SBinding bindings[1]; + { + bindings[0].binding = 0; + bindings[0].count = 65536u; + bindings[0].createFlags = core::bitflag(IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT) + | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT + | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT; + bindings[0].type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE; + bindings[0].stageFlags = asset::IShader::E_SHADER_STAGE::ESS_COMPUTE; + } + + std::span bindingsSpan(bindings); + + // TODO: I don't think these are needed for sub allocated descriptor sets (alignment isn't needed, and min size is 1) + auto subAllocatedDescriptorSet = core::make_smart_refctd_ptr>>( + bindings, MaxDescriptorSetAllocationAlignment, MinDescriptorSetAllocationSize + ); + + std::vector allocation, size; + { + for (uint32_t i = 0; i < 512; i++) + { + allocation.push_back(core::GeneralpurposeAddressAllocator::invalid_address); + size.push_back(4); + } + subAllocatedDescriptorSet->multi_allocate(allocation.size(), &allocation[0], &size[0]); + for (uint32_t i = 0; i < allocation.size(); i++) + { + m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation[i]); + assert(allocation[i] != core::GeneralpurposeAddressAllocator::invalid_address); + } + } + { + std::vector addr, freeSize; + for (uint32_t i = 0; i < 512; i+=2) + { + addr.push_back(allocation[i]); + freeSize.push_back(4); + } + subAllocatedDescriptorSet->multi_deallocate(addr.size(), &addr[0], &freeSize[0]); + } + + m_logger->log("Freed some allocations", system::ILogger::ELL_INFO); + allocation.clear(); + size.clear(); + { + for (uint32_t i = 0; i < 512; i++) + { + allocation.push_back(core::GeneralpurposeAddressAllocator::invalid_address); + size.push_back(2); + } + subAllocatedDescriptorSet->multi_allocate(allocation.size(), &allocation[0], &size[0]); + for (uint32_t i = 0; i < allocation.size(); i++) + { + m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation[i]); + assert(allocation[i] != core::GeneralpurposeAddressAllocator::invalid_address); + } + } + + return true; + } + + // Ok this time we'll actually have a work loop (maybe just for the sake of future WASM so we don't timeout a Browser Tab with an unresponsive script) + bool keepRunning() override { return m_iterationacquirePool(); + } while (poolIx==ICommandPoolCache::invalid_index); + + smart_refctd_ptr cmdbuf; + { + m_poolCache->getPool(poolIx)->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{&cmdbuf,1},core::smart_refctd_ptr(m_logger)); + // lets record, its still a one time submit because we have to re-record with different push constants each time + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + // COMMAND RECORDING + + auto result = cmdbuf->end(); + assert(result); + } + + + const auto savedIterNum = m_iteration++; + { + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = + { + .cmdbuf = cmdbuf.get() + }; + const IQueue::SSubmitInfo::SSemaphoreInfo signalInfo = + { + .semaphore = m_timeline.get(), + .value = m_iteration, + .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT + }; + // Generally speaking we don't need to wait on any semaphore because in this example every dispatch gets its own clean piece of memory to use + // from the point of view of the GPU. Implicit domain operations between Host and Device happen upon a submit and a semaphore/fence signal operation, + // this ensures we can touch the input and get accurate values from the output memory using the CPU before and after respectively, each submit becoming PENDING. + // If we actually cared about this submit seeing the memory accesses of a previous dispatch we could add a semaphore wait + const IQueue::SSubmitInfo submitInfo = { + .waitSemaphores = {}, + .commandBuffers = {&cmdbufInfo,1}, + .signalSemaphores = {&signalInfo,1} + }; + + queue->startCapture(); + auto statusCode = queue->submit({ &submitInfo,1 }); + queue->endCapture(); + assert(statusCode == IQueue::RESULT::SUCCESS); + } + } +}; + +NBL_MAIN_FUNC(SubAllocatedDescriptorSetApp) \ No newline at end of file diff --git a/67_SubAllocatedDescriptorSet/pipeline.groovy b/67_SubAllocatedDescriptorSet/pipeline.groovy new file mode 100644 index 000000000..1a7b043a4 --- /dev/null +++ b/67_SubAllocatedDescriptorSet/pipeline.groovy @@ -0,0 +1,50 @@ +import org.DevshGraphicsProgramming.Agent +import org.DevshGraphicsProgramming.BuilderInfo +import org.DevshGraphicsProgramming.IBuilder + +class CStreamingAndBufferDeviceAddressBuilder extends IBuilder +{ + public CStreamingAndBufferDeviceAddressBuilder(Agent _agent, _info) + { + super(_agent, _info) + } + + @Override + public boolean prepare(Map axisMapping) + { + return true + } + + @Override + public boolean build(Map axisMapping) + { + IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") + IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") + + def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) + def nameOfConfig = getNameOfConfig(config) + + agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") + + return true + } + + @Override + public boolean test(Map axisMapping) + { + return true + } + + @Override + public boolean install(Map axisMapping) + { + return true + } +} + +def create(Agent _agent, _info) +{ + return new CStreamingAndBufferDeviceAddressBuilder(_agent, _info) +} + +return this \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a20a33a9..4a9c2b376 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,5 +65,6 @@ if(NBL_BUILD_EXAMPLES) #add_subdirectory(61_UI EXCLUDE_FROM_ALL) add_subdirectory(62_CAD EXCLUDE_FROM_ALL) add_subdirectory(62_SchusslerTest EXCLUDE_FROM_ALL) + add_subdirectory(67_SubAllocatedDescriptorSet EXCLUDE_FROM_ALL) add_subdirectory(0_ImportanceSamplingEnvMaps EXCLUDE_FROM_ALL) #TODO: integrate back into 42 endif() \ No newline at end of file From 5005a4c4038953f782f5e9c0b79f25025769231d Mon Sep 17 00:00:00 2001 From: devsh Date: Fri, 16 Feb 2024 14:08:08 +0100 Subject: [PATCH 02/12] Change the way device filtering works --- 08_HelloSwapchain/main.cpp | 33 +++++++++++++++++++------------- common/MonoDeviceApplication.hpp | 7 ++++--- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/08_HelloSwapchain/main.cpp b/08_HelloSwapchain/main.cpp index e6581d525..89f843b18 100644 --- a/08_HelloSwapchain/main.cpp +++ b/08_HelloSwapchain/main.cpp @@ -17,6 +17,10 @@ class WindowedApplication : public virtual BasicMultiQueueApplication public: using base_t::base_t; + // We inherit from an application that tries to find Graphics and Compute queues + // because applications with presentable images often want to perform Graphics family operations + virtual bool isComputeOnly() const {return false;} + virtual video::IAPIConnection::SFeatures getAPIFeaturesToEnable() override { auto retval = base_t::getAPIFeaturesToEnable(); @@ -26,22 +30,23 @@ class WindowedApplication : public virtual BasicMultiQueueApplication } // New function, we neeed to know about surfaces to create ahead of time - virtual core::vector getSurfaces() const = 0; + virtual core::vector getSurfaces() const = 0; - virtual core::set filterDevices(const core::SRange& physicalDevices) const + // We have a very simple heuristic, the device must be able to render to all windows! + // (want to make something more complex? you're on your own!) + virtual void filterDevices(core::set& physicalDevices) const { - const auto firstFilter = base_t::filterDevices(physicalDevices); + base_t::filterDevices(physicalDevices); video::SPhysicalDeviceFilter deviceFilter = {}; - const auto surfaces = getSurfaces(); - deviceFilter.requiredSurfaceCompatibilities = surfaces.data(); - deviceFilter.requiredSurfaceCompatibilitiesCount = surfaces.size(); + auto surfaces = getSurfaces(); + deviceFilter.requiredSurfaceCompatibilities = {surfaces}; return deviceFilter(physicalDevices); } - virtual bool onAppInitialized(smart_refctd_ptr&& system) override + virtual bool onAppInitialized(core::smart_refctd_ptr&& system) override { // Remember to call the base class initialization! if (!base_t::onAppInitialized(std::move(system))) @@ -52,6 +57,7 @@ class WindowedApplication : public virtual BasicMultiQueueApplication #else #error "Unimplemented!" #endif + return true; } core::smart_refctd_ptr m_winMgr; @@ -87,7 +93,7 @@ class SingleNonResizableWindowApplication : public virtual WindowedApplication public: using base_t::base_t; - virtual bool onAppInitialized(smart_refctd_ptr&& system) override + virtual bool onAppInitialized(core::smart_refctd_ptr&& system) override { // Remember to call the base class initialization! if (!base_t::onAppInitialized(std::move(system))) @@ -98,7 +104,7 @@ class SingleNonResizableWindowApplication : public virtual WindowedApplication return true; } - virtual core::vector getSurfaces() const + virtual core::vector getSurfaces() const { return {{m_surface.get()/*,EQF_NONE*/}}; } @@ -112,15 +118,15 @@ class SingleNonResizableWindowApplication : public virtual WindowedApplication } protected: - virtual IWindow::SCreationParams getWindowCreationParams() const + virtual ui::IWindow::SCreationParams getWindowCreationParams() const { - IWindow::SCreationParams params = {}; - params.callback = make_smart_refctd_ptr(); + ui::IWindow::SCreationParams params = {}; + params.callback = core::make_smart_refctd_ptr(); params.width = 640; params.height = 480; params.x = 32; params.y = 32; - params.flags = IWindow::ECF_NONE; + params.flags = ui::IWindow::ECF_NONE; params.windowCaption = "SingleNonResizableWindowApplication"; return params; } @@ -130,6 +136,7 @@ class SingleNonResizableWindowApplication : public virtual WindowedApplication }; } +#include "nbl/video/CVulkanSwapchain.h" using namespace nbl; using namespace core; diff --git a/common/MonoDeviceApplication.hpp b/common/MonoDeviceApplication.hpp index ca4e6d449..64728d892 100644 --- a/common/MonoDeviceApplication.hpp +++ b/common/MonoDeviceApplication.hpp @@ -40,7 +40,8 @@ class MonoDeviceApplication : public virtual MonoSystemMonoLoggerApplication if (gpus.empty()) return logFail("Failed to find any Nabla Core Profile Vulkan devices!"); - const core::set suitablePhysicalDevices = filterDevices(gpus); + core::set suitablePhysicalDevices(gpus.begin(),gpus.end()); + filterDevices(suitablePhysicalDevices); if (suitablePhysicalDevices.empty()) return logFail("No PhysicalDevice met the feature requirements of the application!"); @@ -78,7 +79,7 @@ class MonoDeviceApplication : public virtual MonoSystemMonoLoggerApplication } // a device filter helps you create a set of physical devices that satisfy your requirements in terms of features, limits etc. - virtual core::set filterDevices(const core::SRange& physicalDevices) const + virtual void filterDevices(core::set& physicalDevices) const { video::SPhysicalDeviceFilter deviceFilter = {}; @@ -96,7 +97,7 @@ class MonoDeviceApplication : public virtual MonoSystemMonoLoggerApplication const auto queueReqs = getQueueRequirements(); deviceFilter.queueRequirements = queueReqs; - return deviceFilter(physicalDevices); + deviceFilter(physicalDevices); } // virtual function so you can override as needed for some example father down the line From f18077bc181132afe687b3d3fec3783c3c272a4b Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Mon, 19 Feb 2024 16:53:39 -0300 Subject: [PATCH 03/12] Update example to match changes --- .../app_resources/common.hlsl | 20 ----------- .../app_resources/shader.comp.hlsl | 33 ------------------- 67_SubAllocatedDescriptorSet/main.cpp | 11 +++---- 3 files changed, 5 insertions(+), 59 deletions(-) delete mode 100644 67_SubAllocatedDescriptorSet/app_resources/common.hlsl delete mode 100644 67_SubAllocatedDescriptorSet/app_resources/shader.comp.hlsl diff --git a/67_SubAllocatedDescriptorSet/app_resources/common.hlsl b/67_SubAllocatedDescriptorSet/app_resources/common.hlsl deleted file mode 100644 index 456dc6740..000000000 --- a/67_SubAllocatedDescriptorSet/app_resources/common.hlsl +++ /dev/null @@ -1,20 +0,0 @@ -#include "nbl/builtin/hlsl/cpp_compat.hlsl" - -// Unfortunately not every piece of C++14 metaprogramming syntax is available in HLSL 202x -// https://github.com/microsoft/DirectXShaderCompiler/issues/5751#issuecomment-1800847954 -typedef nbl::hlsl::float32_t3 input_t; -typedef nbl::hlsl::float32_t output_t; - -NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxPossibleElementCount = 1 << 20; - -struct PushConstantData -{ - uint64_t inputAddress; - uint64_t outputAddress; - uint32_t dataElementCount; -}; - -NBL_CONSTEXPR uint32_t WorkgroupSize = 256; - -// Yes we do have our own re-creation of C++'s STL in HLSL2021 ! -#include "nbl/builtin/hlsl/limits.hlsl" \ No newline at end of file diff --git a/67_SubAllocatedDescriptorSet/app_resources/shader.comp.hlsl b/67_SubAllocatedDescriptorSet/app_resources/shader.comp.hlsl deleted file mode 100644 index 4aeef0e0f..000000000 --- a/67_SubAllocatedDescriptorSet/app_resources/shader.comp.hlsl +++ /dev/null @@ -1,33 +0,0 @@ -#include "common.hlsl" - -// just a small test -#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl" - -[[vk::push_constant]] PushConstantData pushConstants; - -// does absolutely nothing, a later example will show how it gets used -template -void dummyTraitTest() {} - -[numthreads(WorkgroupSize,1,1)] -void main(uint32_t3 ID : SV_DispatchThreadID) -{ - dummyTraitTest(); - if (ID.x>=pushConstants.dataElementCount) - return; - - const input_t self = vk::RawBufferLoad(pushConstants.inputAddress+sizeof(input_t)*ID.x); - - nbl::hlsl::Xoroshiro64StarStar rng = nbl::hlsl::Xoroshiro64StarStar::construct(uint32_t2(pushConstants.dataElementCount,ID.x)^0xdeadbeefu); - - float32_t acc = nbl::hlsl::numeric_limits::max; - const static uint32_t OthersToTest = 15; - [[unroll(OthersToTest)]] - for (uint32_t i=0; i(pushConstants.inputAddress+sizeof(input_t)*offset); - acc = min(length(other-self),acc); - } - vk::RawBufferStore(pushConstants.outputAddress+sizeof(float32_t)*ID.x,acc); -} \ No newline at end of file diff --git a/67_SubAllocatedDescriptorSet/main.cpp b/67_SubAllocatedDescriptorSet/main.cpp index fe021929b..398c842dc 100644 --- a/67_SubAllocatedDescriptorSet/main.cpp +++ b/67_SubAllocatedDescriptorSet/main.cpp @@ -16,7 +16,6 @@ using namespace ui; using namespace asset; using namespace video; -#include "app_resources/common.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" // In this application we'll cover buffer streaming, Buffer Device Address (BDA) and push constants @@ -29,7 +28,7 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio // Its a little more ergonomic to use if you don't have a 1:1 mapping between frames and pools. smart_refctd_ptr m_poolCache; - smart_refctd_ptr>> m_subAllocDescriptorSet; + smart_refctd_ptr m_subAllocDescriptorSet; // This example really lets the advantages of a timeline semaphore shine through! smart_refctd_ptr m_timeline; @@ -82,7 +81,7 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio std::span bindingsSpan(bindings); // TODO: I don't think these are needed for sub allocated descriptor sets (alignment isn't needed, and min size is 1) - auto subAllocatedDescriptorSet = core::make_smart_refctd_ptr>>( + auto subAllocatedDescriptorSet = core::make_smart_refctd_ptr( bindings, MaxDescriptorSetAllocationAlignment, MinDescriptorSetAllocationSize ); @@ -93,7 +92,7 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio allocation.push_back(core::GeneralpurposeAddressAllocator::invalid_address); size.push_back(4); } - subAllocatedDescriptorSet->multi_allocate(allocation.size(), &allocation[0], &size[0]); + subAllocatedDescriptorSet->multi_allocate(0, allocation.size(), &allocation[0], &size[0]); for (uint32_t i = 0; i < allocation.size(); i++) { m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation[i]); @@ -107,7 +106,7 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio addr.push_back(allocation[i]); freeSize.push_back(4); } - subAllocatedDescriptorSet->multi_deallocate(addr.size(), &addr[0], &freeSize[0]); + subAllocatedDescriptorSet->multi_deallocate(0, addr.size(), &addr[0], &freeSize[0]); } m_logger->log("Freed some allocations", system::ILogger::ELL_INFO); @@ -119,7 +118,7 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio allocation.push_back(core::GeneralpurposeAddressAllocator::invalid_address); size.push_back(2); } - subAllocatedDescriptorSet->multi_allocate(allocation.size(), &allocation[0], &size[0]); + subAllocatedDescriptorSet->multi_allocate(0, allocation.size(), &allocation[0], &size[0]); for (uint32_t i = 0; i < allocation.size(); i++) { m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation[i]); From 8dee3637262203a9a5e10b58e77f62d746133bf8 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Wed, 21 Feb 2024 08:54:57 -0300 Subject: [PATCH 04/12] Conform to API change --- 67_SubAllocatedDescriptorSet/main.cpp | 61 ++++++++++++--------------- 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/67_SubAllocatedDescriptorSet/main.cpp b/67_SubAllocatedDescriptorSet/main.cpp index 398c842dc..b1c22dcbd 100644 --- a/67_SubAllocatedDescriptorSet/main.cpp +++ b/67_SubAllocatedDescriptorSet/main.cpp @@ -67,62 +67,55 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio // Descriptor set sub allocator - video::IGPUDescriptorSetLayout::SBinding bindings[1]; + video::IGPUDescriptorSetLayout::SBinding bindings[12]; { - bindings[0].binding = 0; - bindings[0].count = 65536u; - bindings[0].createFlags = core::bitflag(IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT) - | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT - | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT; - bindings[0].type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE; - bindings[0].stageFlags = asset::IShader::E_SHADER_STAGE::ESS_COMPUTE; + for (uint32_t i = 0; i < 12; i++) + { + bindings[i].binding = i; + bindings[i].count = 16000; + bindings[i].createFlags = core::bitflag(IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT) + | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT + | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT; + if (i % 2 == 0) bindings[i].type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE; + else if (i % 2 == 1) bindings[i].type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER; + bindings[i].stageFlags = asset::IShader::E_SHADER_STAGE::ESS_COMPUTE; + } } std::span bindingsSpan(bindings); + auto descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); + // TODO: I don't think these are needed for sub allocated descriptor sets (alignment isn't needed, and min size is 1) auto subAllocatedDescriptorSet = core::make_smart_refctd_ptr( - bindings, MaxDescriptorSetAllocationAlignment, MinDescriptorSetAllocationSize + descriptorSetLayout.get(), MaxDescriptorSetAllocationAlignment, MinDescriptorSetAllocationSize ); - std::vector allocation, size; + std::vector allocation(128, core::PoolAddressAllocator::invalid_address); { - for (uint32_t i = 0; i < 512; i++) - { - allocation.push_back(core::GeneralpurposeAddressAllocator::invalid_address); - size.push_back(4); - } - subAllocatedDescriptorSet->multi_allocate(0, allocation.size(), &allocation[0], &size[0]); + subAllocatedDescriptorSet->multi_allocate(0, allocation.size(), &allocation[0]); for (uint32_t i = 0; i < allocation.size(); i++) { m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation[i]); - assert(allocation[i] != core::GeneralpurposeAddressAllocator::invalid_address); + assert(allocation[i] != core::PoolAddressAllocator::invalid_address); } } { - std::vector addr, freeSize; - for (uint32_t i = 0; i < 512; i+=2) + std::vector addr; + for (uint32_t i = 0; i < allocation.size(); i+=2) { addr.push_back(allocation[i]); - freeSize.push_back(4); } - subAllocatedDescriptorSet->multi_deallocate(0, addr.size(), &addr[0], &freeSize[0]); + subAllocatedDescriptorSet->multi_deallocate(0, addr.size(), &addr[0]); } - - m_logger->log("Freed some allocations", system::ILogger::ELL_INFO); - allocation.clear(); - size.clear(); + m_logger->log("freed half the descriptors", system::ILogger::ELL_INFO); + std::vector allocation2(128, core::PoolAddressAllocator::invalid_address); { - for (uint32_t i = 0; i < 512; i++) + subAllocatedDescriptorSet->multi_allocate(0, allocation2.size(), &allocation2[0]); + for (uint32_t i = 0; i < allocation2.size(); i++) { - allocation.push_back(core::GeneralpurposeAddressAllocator::invalid_address); - size.push_back(2); - } - subAllocatedDescriptorSet->multi_allocate(0, allocation.size(), &allocation[0], &size[0]); - for (uint32_t i = 0; i < allocation.size(); i++) - { - m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation[i]); - assert(allocation[i] != core::GeneralpurposeAddressAllocator::invalid_address); + m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation2[i]); + assert(allocation2[i] != core::PoolAddressAllocator::invalid_address); } } From 0b805e0fbf13ebc85e7aafb28cc49fdad741932f Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Wed, 21 Feb 2024 13:50:27 -0300 Subject: [PATCH 05/12] PR reviews --- 67_SubAllocatedDescriptorSet/main.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/67_SubAllocatedDescriptorSet/main.cpp b/67_SubAllocatedDescriptorSet/main.cpp index b1c22dcbd..a1587ef5f 100644 --- a/67_SubAllocatedDescriptorSet/main.cpp +++ b/67_SubAllocatedDescriptorSet/main.cpp @@ -35,9 +35,6 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio uint64_t m_iteration = 0; constexpr static inline uint64_t MaxIterations = 200; - constexpr static inline uint32_t MaxDescriptorSetAllocationAlignment = 64u*1024u; // if you need larger alignments then you're not right in the head - constexpr static inline uint32_t MinDescriptorSetAllocationSize = 1u; - public: // Yay thanks to multiple inheritance we cannot forward ctors anymore SubAllocatedDescriptorSetApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : @@ -72,7 +69,7 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio for (uint32_t i = 0; i < 12; i++) { bindings[i].binding = i; - bindings[i].count = 16000; + bindings[i].count = 512; bindings[i].createFlags = core::bitflag(IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT) | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT; @@ -87,10 +84,7 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio auto descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); // TODO: I don't think these are needed for sub allocated descriptor sets (alignment isn't needed, and min size is 1) - auto subAllocatedDescriptorSet = core::make_smart_refctd_ptr( - descriptorSetLayout.get(), MaxDescriptorSetAllocationAlignment, MinDescriptorSetAllocationSize - ); - + auto subAllocatedDescriptorSet = core::make_smart_refctd_ptr(descriptorSetLayout.get()); std::vector allocation(128, core::PoolAddressAllocator::invalid_address); { subAllocatedDescriptorSet->multi_allocate(0, allocation.size(), &allocation[0]); From 461a7dae01c3c02687706fd7a0a9a20c70acdf10 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Tue, 27 Feb 2024 17:40:11 -0300 Subject: [PATCH 06/12] Fix example --- 67_SubAllocatedDescriptorSet/main.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/67_SubAllocatedDescriptorSet/main.cpp b/67_SubAllocatedDescriptorSet/main.cpp index a1587ef5f..e11faa1e5 100644 --- a/67_SubAllocatedDescriptorSet/main.cpp +++ b/67_SubAllocatedDescriptorSet/main.cpp @@ -83,8 +83,19 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio auto descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); + video::IDescriptorPool::SCreateInfo poolParams = {}; + { + poolParams.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE)] = 512 * 6; + poolParams.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER)] = 512 * 6; + poolParams.maxSets = 1; + poolParams.flags = core::bitflag(video::IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT); + } + + auto descriptorPool = m_device->createDescriptorPool(std::move(poolParams)); + auto descriptorSet = descriptorPool->createDescriptorSet(core::smart_refctd_ptr(descriptorSetLayout)); + // TODO: I don't think these are needed for sub allocated descriptor sets (alignment isn't needed, and min size is 1) - auto subAllocatedDescriptorSet = core::make_smart_refctd_ptr(descriptorSetLayout.get()); + auto subAllocatedDescriptorSet = core::make_smart_refctd_ptr(core::smart_refctd_ptr(descriptorSet)); std::vector allocation(128, core::PoolAddressAllocator::invalid_address); { subAllocatedDescriptorSet->multi_allocate(0, allocation.size(), &allocation[0]); From 5a94b7ef784f1aa81905cc4a05aab2adc64576ed Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Mon, 4 Mar 2024 19:16:34 -0300 Subject: [PATCH 07/12] Fix example with changes --- 67_SubAllocatedDescriptorSet/main.cpp | 54 +++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/67_SubAllocatedDescriptorSet/main.cpp b/67_SubAllocatedDescriptorSet/main.cpp index e11faa1e5..93c7c486d 100644 --- a/67_SubAllocatedDescriptorSet/main.cpp +++ b/67_SubAllocatedDescriptorSet/main.cpp @@ -94,11 +94,57 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio auto descriptorPool = m_device->createDescriptorPool(std::move(poolParams)); auto descriptorSet = descriptorPool->createDescriptorSet(core::smart_refctd_ptr(descriptorSetLayout)); + + auto createImageDescriptor = [&](uint32_t width, uint32_t height) + { + auto image = m_device->createImage(nbl::video::IGPUImage::SCreationParams { + { + .type = nbl::video::IGPUImage::E_TYPE::ET_2D, + .samples = nbl::video::IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT, + .format = nbl::asset::E_FORMAT::EF_R8G8B8A8_UNORM, + .extent = { width, height, 1 }, + .mipLevels = 1, + .arrayLayers = 1, + .usage = nbl::video::IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT + | nbl::video::IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT + | nbl::video::IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT, + }, {}, nbl::video::IGPUImage::TILING::LINEAR, + }); + + auto reqs = image->getMemoryReqs(); + reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(reqs, image.get()); + + auto imageView = m_device->createImageView(nbl::video::IGPUImageView::SCreationParams { + .image = image, + .viewType = nbl::video::IGPUImageView::E_TYPE::ET_2D, + .format = nbl::asset::E_FORMAT::EF_R8G8B8A8_UNORM, + // .subresourceRange = { nbl::video::IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, 0, 1, 0, 1 }, + }); + + video::IGPUDescriptorSet::SDescriptorInfo descriptorInfo = {}; + descriptorInfo.desc = imageView; + descriptorInfo.info.image.imageLayout = asset::IImage::LAYOUT::GENERAL; + + return descriptorInfo; + }; + // TODO: I don't think these are needed for sub allocated descriptor sets (alignment isn't needed, and min size is 1) - auto subAllocatedDescriptorSet = core::make_smart_refctd_ptr(core::smart_refctd_ptr(descriptorSet)); + auto subAllocatedDescriptorSet = core::make_smart_refctd_ptr(core::smart_refctd_ptr(descriptorSet), core::smart_refctd_ptr(m_device)); std::vector allocation(128, core::PoolAddressAllocator::invalid_address); + std::vector descriptors; + std::vector descriptorWrites(allocation.size(), video::IGPUDescriptorSet::SWriteDescriptorSet{}); + + for (uint32_t i = 0; i < allocation.size(); i++) + { + auto descriptorInfo = createImageDescriptor(80, 80); + descriptors.push_back(descriptorInfo); + } + { - subAllocatedDescriptorSet->multi_allocate(0, allocation.size(), &allocation[0]); + auto allocNum = subAllocatedDescriptorSet->multi_allocate(0, allocation.size(), descriptors.data(), descriptorWrites.data(), allocation.data()); + assert(allocNum == 0); + m_device->updateDescriptorSets(descriptorWrites, {}); for (uint32_t i = 0; i < allocation.size(); i++) { m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation[i]); @@ -116,7 +162,9 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio m_logger->log("freed half the descriptors", system::ILogger::ELL_INFO); std::vector allocation2(128, core::PoolAddressAllocator::invalid_address); { - subAllocatedDescriptorSet->multi_allocate(0, allocation2.size(), &allocation2[0]); + auto allocNum = subAllocatedDescriptorSet->multi_allocate(0, allocation2.size(), descriptors.data(), descriptorWrites.data(), &allocation2[0]); + assert(allocNum == 0); + m_device->updateDescriptorSets(descriptorWrites, {}); for (uint32_t i = 0; i < allocation2.size(); i++) { m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation2[i]); From 2d9181d1879edf93bc9518e6d0ff4d0bce6f27f7 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Mon, 4 Mar 2024 19:31:33 -0300 Subject: [PATCH 08/12] Fix conflict --- 08_HelloSwapchain/main.cpp | 135 ------------------------------------- 1 file changed, 135 deletions(-) diff --git a/08_HelloSwapchain/main.cpp b/08_HelloSwapchain/main.cpp index 0f28e2097..9585f4121 100644 --- a/08_HelloSwapchain/main.cpp +++ b/08_HelloSwapchain/main.cpp @@ -6,141 +6,6 @@ // #include "nbl/video/surface/CSurfaceVulkan.h" -<<<<<<< HEAD - -#include "../common/BasicMultiQueueApplication.hpp" - -namespace nbl::examples -{ -// Virtual Inheritance because apps might end up doing diamond inheritance -class WindowedApplication : public virtual BasicMultiQueueApplication -{ - using base_t = BasicMultiQueueApplication; - - public: - using base_t::base_t; - - // We inherit from an application that tries to find Graphics and Compute queues - // because applications with presentable images often want to perform Graphics family operations - virtual bool isComputeOnly() const {return false;} - - virtual video::IAPIConnection::SFeatures getAPIFeaturesToEnable() override - { - auto retval = base_t::getAPIFeaturesToEnable(); - // We only support one swapchain mode, surface, the other one is Display which we have not implemented yet. - retval.swapchainMode = video::E_SWAPCHAIN_MODE::ESM_SURFACE; - return retval; - } - - // New function, we neeed to know about surfaces to create ahead of time - virtual core::vector getSurfaces() const = 0; - - // We have a very simple heuristic, the device must be able to render to all windows! - // (want to make something more complex? you're on your own!) - virtual void filterDevices(core::set& physicalDevices) const - { - base_t::filterDevices(physicalDevices); - - video::SPhysicalDeviceFilter deviceFilter = {}; - - auto surfaces = getSurfaces(); - deviceFilter.requiredSurfaceCompatibilities = {surfaces}; - - return deviceFilter(physicalDevices); - } - - virtual bool onAppInitialized(core::smart_refctd_ptr&& system) override - { - // Remember to call the base class initialization! - if (!base_t::onAppInitialized(std::move(system))) - return false; - - #ifdef _NBL_PLATFORM_WINDOWS_ - m_winMgr = nbl::ui::IWindowManagerWin32::create(); - #else - #error "Unimplemented!" - #endif - return true; - } - - core::smart_refctd_ptr m_winMgr; -}; - - -// Before we get onto creating a window, we need to discuss how Nabla handles input, clipboards and cursor control -class IWindowClosedCallback : public virtual nbl::ui::IWindow::IEventCallback -{ - public: - IWindowClosedCallback() : m_gotWindowClosedMsg(false) {} - - // unless you create a separate callback per window, both will "trip" this condition - bool windowGotClosed() const {return m_gotWindowClosedMsg;} - - private: - bool onWindowClosed_impl() override - { - m_gotWindowClosedMsg = true; - return true; - } - - bool m_gotWindowClosedMsg; -}; - -// We inherit from an application that tries to find Graphics and Compute queues -// because applications with presentable images often want to perform Graphics family operations -// Virtual Inheritance because apps might end up doing diamond inheritance -class SingleNonResizableWindowApplication : public virtual WindowedApplication -{ - using base_t = WindowedApplication; - - public: - using base_t::base_t; - - virtual bool onAppInitialized(core::smart_refctd_ptr&& system) override - { - // Remember to call the base class initialization! - if (!base_t::onAppInitialized(std::move(system))) - return false; - - m_window = m_winMgr->createWindow(getWindowCreationParams()); - m_surface = video::CSurfaceVulkanWin32::create(core::smart_refctd_ptr(m_api),core::smart_refctd_ptr_static_cast(m_window)); - return true; - } - - virtual core::vector getSurfaces() const - { - return {{m_surface.get()/*,EQF_NONE*/}}; - } - - virtual bool keepRunning() override - { - if (!m_window || reinterpret_cast(m_window->getEventCallback())->windowGotClosed()) - return false; - - return true; - } - - protected: - virtual ui::IWindow::SCreationParams getWindowCreationParams() const - { - ui::IWindow::SCreationParams params = {}; - params.callback = core::make_smart_refctd_ptr(); - params.width = 640; - params.height = 480; - params.x = 32; - params.y = 32; - params.flags = ui::IWindow::ECF_NONE; - params.windowCaption = "SingleNonResizableWindowApplication"; - return params; - } - - core::smart_refctd_ptr m_window; - core::smart_refctd_ptr m_surface; -}; -} - -======= ->>>>>>> vulkan_1_3 #include "nbl/video/CVulkanSwapchain.h" using namespace nbl; From 04ca9e27dd7980d552939d74021f2790eff17622 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Mon, 4 Mar 2024 19:37:22 -0300 Subject: [PATCH 09/12] Fix lack of onAppTerminated --- 67_SubAllocatedDescriptorSet/main.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/67_SubAllocatedDescriptorSet/main.cpp b/67_SubAllocatedDescriptorSet/main.cpp index 93c7c486d..87231a931 100644 --- a/67_SubAllocatedDescriptorSet/main.cpp +++ b/67_SubAllocatedDescriptorSet/main.cpp @@ -231,6 +231,11 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio assert(statusCode == IQueue::RESULT::SUCCESS); } } + + bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } }; NBL_MAIN_FUNC(SubAllocatedDescriptorSetApp) \ No newline at end of file From ffb014e5a686fc2955d8858afda12294bf3c176f Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Mon, 4 Mar 2024 20:17:18 -0300 Subject: [PATCH 10/12] Example PR reviews --- 67_SubAllocatedDescriptorSet/main.cpp | 34 ++++---------------- 67_SubAllocatedDescriptorSet/pipeline.groovy | 6 ++-- 2 files changed, 10 insertions(+), 30 deletions(-) diff --git a/67_SubAllocatedDescriptorSet/main.cpp b/67_SubAllocatedDescriptorSet/main.cpp index 87231a931..a4352f1a2 100644 --- a/67_SubAllocatedDescriptorSet/main.cpp +++ b/67_SubAllocatedDescriptorSet/main.cpp @@ -3,7 +3,6 @@ // For conditions of distribution and use, see copyright notice in nabla.h -#include "nbl/video/surface/CSurfaceVulkan.h" #include "nbl/video/alloc/SubAllocatedDescriptorSet.h" #include "../common/BasicMultiQueueApplication.hpp" @@ -16,50 +15,37 @@ using namespace ui; using namespace asset; using namespace video; -#include "nbl/builtin/hlsl/bit.hlsl" - -// In this application we'll cover buffer streaming, Buffer Device Address (BDA) and push constants class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplication, public examples::MonoAssetManagerAndBuiltinResourceApplication { using device_base_t = examples::MonoDeviceApplication; using asset_base_t = examples::MonoAssetManagerAndBuiltinResourceApplication; - // The pool cache is just a formalized way of round-robining command pools and resetting + reusing them after their most recent submit signals finished. - // Its a little more ergonomic to use if you don't have a 1:1 mapping between frames and pools. smart_refctd_ptr m_poolCache; - smart_refctd_ptr m_subAllocDescriptorSet; - // This example really lets the advantages of a timeline semaphore shine through! smart_refctd_ptr m_timeline; uint64_t m_iteration = 0; constexpr static inline uint64_t MaxIterations = 200; + constexpr static uint32_t AllocatedBinding = 0; public: - // Yay thanks to multiple inheritance we cannot forward ctors anymore SubAllocatedDescriptorSetApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : system::IApplicationFramework(_localInputCWD,_localOutputCWD,_sharedInputCWD,_sharedOutputCWD) {} - // we stuff all our work here because its a "single shot" app bool onAppInitialized(smart_refctd_ptr&& system) override { using nbl::video::IGPUDescriptorSetLayout; - // Remember to call the base class initialization! if (!device_base_t::onAppInitialized(std::move(system))) return false; if (!asset_base_t::onAppInitialized(std::move(system))) return false; - // We'll allow subsequent iterations to overlap each other on the GPU, the only limiting factors are - // the amount of memory in the streaming buffers and the number of commandpools we can use simultaenously. constexpr auto MaxConcurrency = 64; - // Since this time we don't throw the Command Pools away and we'll reset them instead, we don't create the pools with the transient flag m_poolCache = ICommandPoolCache::create(core::smart_refctd_ptr(m_device),getComputeQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::NONE,MaxConcurrency); - // In contrast to fences, we just need one semaphore to rule all dispatches m_timeline = m_device->createSemaphore(m_iteration); // Descriptor set sub allocator @@ -142,13 +128,14 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio } { - auto allocNum = subAllocatedDescriptorSet->multi_allocate(0, allocation.size(), descriptors.data(), descriptorWrites.data(), allocation.data()); + auto allocNum = subAllocatedDescriptorSet->multi_allocate(AllocatedBinding, allocation.size(), descriptors.data(), descriptorWrites.data(), allocation.data()); assert(allocNum == 0); m_device->updateDescriptorSets(descriptorWrites, {}); for (uint32_t i = 0; i < allocation.size(); i++) { m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation[i]); - assert(allocation[i] != core::PoolAddressAllocator::invalid_address); + if (allocation[i] == core::PoolAddressAllocator::invalid_address) + return logFail("value at %d wasn't allocated", i); } } { @@ -162,28 +149,26 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio m_logger->log("freed half the descriptors", system::ILogger::ELL_INFO); std::vector allocation2(128, core::PoolAddressAllocator::invalid_address); { - auto allocNum = subAllocatedDescriptorSet->multi_allocate(0, allocation2.size(), descriptors.data(), descriptorWrites.data(), &allocation2[0]); + auto allocNum = subAllocatedDescriptorSet->multi_allocate(AllocatedBinding, allocation2.size(), descriptors.data(), descriptorWrites.data(), &allocation2[0]); assert(allocNum == 0); m_device->updateDescriptorSets(descriptorWrites, {}); for (uint32_t i = 0; i < allocation2.size(); i++) { m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation2[i]); - assert(allocation2[i] != core::PoolAddressAllocator::invalid_address); + if (allocation2[i] == core::PoolAddressAllocator::invalid_address) + return logFail("value at %d wasn't allocated", i); } } return true; } - // Ok this time we'll actually have a work loop (maybe just for the sake of future WASM so we don't timeout a Browser Tab with an unresponsive script) bool keepRunning() override { return m_iteration cmdbuf; { m_poolCache->getPool(poolIx)->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{&cmdbuf,1},core::smart_refctd_ptr(m_logger)); - // lets record, its still a one time submit because we have to re-record with different push constants each time cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); // COMMAND RECORDING @@ -215,10 +199,6 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio .value = m_iteration, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT }; - // Generally speaking we don't need to wait on any semaphore because in this example every dispatch gets its own clean piece of memory to use - // from the point of view of the GPU. Implicit domain operations between Host and Device happen upon a submit and a semaphore/fence signal operation, - // this ensures we can touch the input and get accurate values from the output memory using the CPU before and after respectively, each submit becoming PENDING. - // If we actually cared about this submit seeing the memory accesses of a previous dispatch we could add a semaphore wait const IQueue::SSubmitInfo submitInfo = { .waitSemaphores = {}, .commandBuffers = {&cmdbufInfo,1}, diff --git a/67_SubAllocatedDescriptorSet/pipeline.groovy b/67_SubAllocatedDescriptorSet/pipeline.groovy index 1a7b043a4..4d7b41369 100644 --- a/67_SubAllocatedDescriptorSet/pipeline.groovy +++ b/67_SubAllocatedDescriptorSet/pipeline.groovy @@ -2,9 +2,9 @@ import org.DevshGraphicsProgramming.Agent import org.DevshGraphicsProgramming.BuilderInfo import org.DevshGraphicsProgramming.IBuilder -class CStreamingAndBufferDeviceAddressBuilder extends IBuilder +class CSubAllocatedDescriptorSetBuilder extends IBuilder { - public CStreamingAndBufferDeviceAddressBuilder(Agent _agent, _info) + public CSubAllocatedDescriptorSetBuilder(Agent _agent, _info) { super(_agent, _info) } @@ -44,7 +44,7 @@ class CStreamingAndBufferDeviceAddressBuilder extends IBuilder def create(Agent _agent, _info) { - return new CStreamingAndBufferDeviceAddressBuilder(_agent, _info) + return new CSubAllocatedDescriptorSetBuilder(_agent, _info) } return this \ No newline at end of file From 9b6764f885374b97c841f6f9e188505ca9d24601 Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Thu, 7 Mar 2024 23:55:09 -0300 Subject: [PATCH 11/12] Fix up example --- 67_SubAllocatedDescriptorSet/main.cpp | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/67_SubAllocatedDescriptorSet/main.cpp b/67_SubAllocatedDescriptorSet/main.cpp index a4352f1a2..b7ec7bbdf 100644 --- a/67_SubAllocatedDescriptorSet/main.cpp +++ b/67_SubAllocatedDescriptorSet/main.cpp @@ -118,19 +118,11 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio // TODO: I don't think these are needed for sub allocated descriptor sets (alignment isn't needed, and min size is 1) auto subAllocatedDescriptorSet = core::make_smart_refctd_ptr(core::smart_refctd_ptr(descriptorSet), core::smart_refctd_ptr(m_device)); std::vector allocation(128, core::PoolAddressAllocator::invalid_address); - std::vector descriptors; - std::vector descriptorWrites(allocation.size(), video::IGPUDescriptorSet::SWriteDescriptorSet{}); + std::vector descriptorDrops(allocation.size(), video::IGPUDescriptorSet::SDropDescriptorSet{}); - for (uint32_t i = 0; i < allocation.size(); i++) { - auto descriptorInfo = createImageDescriptor(80, 80); - descriptors.push_back(descriptorInfo); - } - - { - auto allocNum = subAllocatedDescriptorSet->multi_allocate(AllocatedBinding, allocation.size(), descriptors.data(), descriptorWrites.data(), allocation.data()); + auto allocNum = subAllocatedDescriptorSet->multi_allocate(AllocatedBinding, allocation.size(), allocation.data()); assert(allocNum == 0); - m_device->updateDescriptorSets(descriptorWrites, {}); for (uint32_t i = 0; i < allocation.size(); i++) { m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation[i]); @@ -144,14 +136,13 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio { addr.push_back(allocation[i]); } - subAllocatedDescriptorSet->multi_deallocate(0, addr.size(), &addr[0]); + subAllocatedDescriptorSet->multi_deallocate(descriptorDrops.data(), AllocatedBinding, addr.size(), addr.data()); } m_logger->log("freed half the descriptors", system::ILogger::ELL_INFO); std::vector allocation2(128, core::PoolAddressAllocator::invalid_address); { - auto allocNum = subAllocatedDescriptorSet->multi_allocate(AllocatedBinding, allocation2.size(), descriptors.data(), descriptorWrites.data(), &allocation2[0]); + auto allocNum = subAllocatedDescriptorSet->multi_allocate(AllocatedBinding, allocation2.size(), allocation2.data()); assert(allocNum == 0); - m_device->updateDescriptorSets(descriptorWrites, {}); for (uint32_t i = 0; i < allocation2.size(); i++) { m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation2[i]); From 7906d1cac91881862aca8295bab0726f76350fec Mon Sep 17 00:00:00 2001 From: deprilula28 Date: Tue, 12 Mar 2024 00:38:56 -0300 Subject: [PATCH 12/12] Use example with multi timeline stuff --- 67_SubAllocatedDescriptorSet/main.cpp | 212 ++++++++++++++++++-------- 1 file changed, 145 insertions(+), 67 deletions(-) diff --git a/67_SubAllocatedDescriptorSet/main.cpp b/67_SubAllocatedDescriptorSet/main.cpp index b7ec7bbdf..ace25bd30 100644 --- a/67_SubAllocatedDescriptorSet/main.cpp +++ b/67_SubAllocatedDescriptorSet/main.cpp @@ -8,6 +8,8 @@ #include "../common/BasicMultiQueueApplication.hpp" #include "../common/MonoAssetManagerAndBuiltinResourceApplication.hpp" +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" + using namespace nbl; using namespace core; using namespace system; @@ -26,12 +28,116 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio smart_refctd_ptr m_timeline; uint64_t m_iteration = 0; constexpr static inline uint64_t MaxIterations = 200; + constexpr static inline uint64_t MaxDescriptors = 512; + constexpr static inline uint64_t MaxAllocPerFrame = 10; constexpr static uint32_t AllocatedBinding = 0; + smart_refctd_ptr m_descriptorImages[MaxDescriptors]; + smart_refctd_ptr m_descriptorBuffers[MaxDescriptors]; public: SubAllocatedDescriptorSetApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : system::IApplicationFramework(_localInputCWD,_localOutputCWD,_sharedInputCWD,_sharedOutputCWD) {} + bool writeDescriptors(uint32_t count, uint32_t* valueIndices, uint32_t* allocationIndex) + { + auto createImageDescriptor = [&](uint32_t width, uint32_t height) + { + auto image = m_device->createImage(nbl::video::IGPUImage::SCreationParams { + { + .type = nbl::video::IGPUImage::E_TYPE::ET_2D, + .samples = nbl::video::IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT, + .format = nbl::asset::E_FORMAT::EF_R8G8B8A8_UNORM, + .extent = { width, height, 1 }, + .mipLevels = 1, + .arrayLayers = 1, + .usage = nbl::video::IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT + | nbl::video::IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT + | nbl::video::IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT, + }, {}, nbl::video::IGPUImage::TILING::LINEAR, + }); + + auto reqs = image->getMemoryReqs(); + reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(reqs, image.get()); + + auto imageView = m_device->createImageView(nbl::video::IGPUImageView::SCreationParams { + .image = image, + .viewType = nbl::video::IGPUImageView::E_TYPE::ET_2D, + .format = nbl::asset::E_FORMAT::EF_R8G8B8A8_UNORM, + // .subresourceRange = { nbl::video::IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, 0, 1, 0, 1 }, + }); + + return imageView; + }; + + auto createBufferDescriptor = [&](uint32_t size) + { + nbl::video::IGPUBuffer::SCreationParams params; + { + params.size = size; + params.usage = nbl::video::IGPUBuffer::E_USAGE_FLAGS::EUF_STORAGE_BUFFER_BIT + | nbl::video::IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT + | nbl::video::IGPUBuffer::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT; + } + auto buffer = m_device->createBuffer(std::move(params)); + + auto reqs = buffer->getMemoryReqs(); + reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(reqs, buffer.get()); + + return buffer; + }; + + + std::vector descriptorWrites; + descriptorWrites.reserve(count); + std::vector descriptorInfos; + { + for (uint32_t i = 0; i < count; i++) + { + auto index = valueIndices[i]; + m_logger->log("writeDescriptors[%d]: allocation[%d]: %d", system::ILogger::ELL_INFO, i, index, allocationIndex[i]); + if (allocationIndex[i] == core::PoolAddressAllocator::invalid_address) + return logFail("value at %d wasn't allocated", i); + + auto allocationIdx = allocationIndex[i]; + + video::IGPUDescriptorSet::SDescriptorInfo descriptorInfo; + + // Storage image + { + m_descriptorImages[index] = createImageDescriptor(256, 256); + descriptorInfo.desc = core::smart_refctd_ptr(m_descriptorImages[index]); + descriptorInfo.info.image.imageLayout = asset::IImage::LAYOUT::GENERAL; + } + // Storage buffer + //{ + // m_descriptorBuffers[index] = createBufferDescriptor(1024); + // descriptorInfo.desc = core::smart_refctd_ptr(m_descriptorBuffers[index]); + // descriptorInfo.info.buffer.offset = 0u; + // descriptorInfo.info.buffer.size = 1024u; + //} + + descriptorInfos.push_back(descriptorInfo); + } + for (uint32_t i = 0; i < count; i++) + { + auto index = valueIndices[i]; + auto allocationIdx = allocationIndex[i]; + + video::IGPUDescriptorSet::SWriteDescriptorSet write; + write.dstSet = m_subAllocDescriptorSet->getDescriptorSet(); + write.binding = AllocatedBinding; + write.arrayElement = index; + write.count = 1u; + write.info = &descriptorInfos[i]; + descriptorWrites.push_back(write); + } + } + + m_device->updateDescriptorSets(descriptorWrites, {}); + } + bool onAppInitialized(smart_refctd_ptr&& system) override { using nbl::video::IGPUDescriptorSetLayout; @@ -55,7 +161,7 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio for (uint32_t i = 0; i < 12; i++) { bindings[i].binding = i; - bindings[i].count = 512; + bindings[i].count = MaxDescriptors; bindings[i].createFlags = core::bitflag(IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT) | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT | IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT; @@ -81,75 +187,21 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio auto descriptorSet = descriptorPool->createDescriptorSet(core::smart_refctd_ptr(descriptorSetLayout)); - auto createImageDescriptor = [&](uint32_t width, uint32_t height) - { - auto image = m_device->createImage(nbl::video::IGPUImage::SCreationParams { - { - .type = nbl::video::IGPUImage::E_TYPE::ET_2D, - .samples = nbl::video::IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT, - .format = nbl::asset::E_FORMAT::EF_R8G8B8A8_UNORM, - .extent = { width, height, 1 }, - .mipLevels = 1, - .arrayLayers = 1, - .usage = nbl::video::IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT - | nbl::video::IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_DST_BIT - | nbl::video::IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT, - }, {}, nbl::video::IGPUImage::TILING::LINEAR, - }); - - auto reqs = image->getMemoryReqs(); - reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - m_device->allocate(reqs, image.get()); - - auto imageView = m_device->createImageView(nbl::video::IGPUImageView::SCreationParams { - .image = image, - .viewType = nbl::video::IGPUImageView::E_TYPE::ET_2D, - .format = nbl::asset::E_FORMAT::EF_R8G8B8A8_UNORM, - // .subresourceRange = { nbl::video::IGPUImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, 0, 1, 0, 1 }, - }); - - video::IGPUDescriptorSet::SDescriptorInfo descriptorInfo = {}; - descriptorInfo.desc = imageView; - descriptorInfo.info.image.imageLayout = asset::IImage::LAYOUT::GENERAL; - - return descriptorInfo; - }; - // TODO: I don't think these are needed for sub allocated descriptor sets (alignment isn't needed, and min size is 1) auto subAllocatedDescriptorSet = core::make_smart_refctd_ptr(core::smart_refctd_ptr(descriptorSet), core::smart_refctd_ptr(m_device)); - std::vector allocation(128, core::PoolAddressAllocator::invalid_address); - std::vector descriptorDrops(allocation.size(), video::IGPUDescriptorSet::SDropDescriptorSet{}); + //std::vector allocation(MaxDescriptors, core::PoolAddressAllocator::invalid_address); - { - auto allocNum = subAllocatedDescriptorSet->multi_allocate(AllocatedBinding, allocation.size(), allocation.data()); - assert(allocNum == 0); - for (uint32_t i = 0; i < allocation.size(); i++) - { - m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation[i]); - if (allocation[i] == core::PoolAddressAllocator::invalid_address) - return logFail("value at %d wasn't allocated", i); - } - } - { - std::vector addr; - for (uint32_t i = 0; i < allocation.size(); i+=2) - { - addr.push_back(allocation[i]); - } - subAllocatedDescriptorSet->multi_deallocate(descriptorDrops.data(), AllocatedBinding, addr.size(), addr.data()); - } - m_logger->log("freed half the descriptors", system::ILogger::ELL_INFO); - std::vector allocation2(128, core::PoolAddressAllocator::invalid_address); - { - auto allocNum = subAllocatedDescriptorSet->multi_allocate(AllocatedBinding, allocation2.size(), allocation2.data()); - assert(allocNum == 0); - for (uint32_t i = 0; i < allocation2.size(); i++) - { - m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation2[i]); - if (allocation2[i] == core::PoolAddressAllocator::invalid_address) - return logFail("value at %d wasn't allocated", i); - } - } + //std::vector indices; + //indices.reserve(MaxDescriptors); + //for (uint32_t i = 0; i < MaxDescriptors; i++) + // indices.push_back(i); + + //auto allocNum = subAllocatedDescriptorSet->multi_allocate(AllocatedBinding, allocation.size(), allocation.data()); + //assert(allocNum == 0); + m_subAllocDescriptorSet = std::move(subAllocatedDescriptorSet); + + //bool response = writeDescriptors(allocation.size(), indices.data(), allocation.data()); + //if (!response) return false; return true; } @@ -160,6 +212,27 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio { IQueue* const queue = getComputeQueue(); + // Similar idea to example 05 (streaming buffers) + // We will be allocating and freeing stuff, latched on previous frame's timeline semaphore + auto rng = nbl::hlsl::Xoroshiro64StarStar::construct({ m_iteration ^ 0xdeadbeefu,std::hash()(_NBL_APP_NAME_) }); + const auto elementCount = rng() % MaxAllocPerFrame; + m_logger->log("elementCount: %d", system::ILogger::ELL_INFO, elementCount); + + std::vector values(elementCount, SubAllocatedDescriptorSet::invalid_value); + + { + std::chrono::steady_clock::time_point waitTill(std::chrono::years(45)); + m_subAllocDescriptorSet->multi_allocate(waitTill, AllocatedBinding, elementCount, values.data()); + + std::vector indices; + indices.reserve(elementCount); + for (uint32_t i = 0; i < elementCount; i++) + indices.push_back(i); + + bool response = writeDescriptors(elementCount, indices.data(), values.data()); + assert(response); + } + uint32_t poolIx; do { @@ -172,6 +245,7 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); // COMMAND RECORDING + // Here we would hipothetically use the descriptors created above auto result = cmdbuf->end(); assert(result); @@ -201,6 +275,10 @@ class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplicatio queue->endCapture(); assert(statusCode == IQueue::RESULT::SUCCESS); } + + const ISemaphore::SWaitInfo futureWait = {m_timeline.get(),m_iteration}; + m_poolCache->releasePool(futureWait,poolIx); + m_subAllocDescriptorSet->multi_deallocate(AllocatedBinding, elementCount, values.data(), futureWait); } bool onAppTerminated() override