Skip to content

Commit

Permalink
Parallel pipeline initialization
Browse files Browse the repository at this point in the history
  • Loading branch information
sergcpp committed Dec 3, 2024
1 parent 6c813f5 commit a03990b
Show file tree
Hide file tree
Showing 27 changed files with 872 additions and 1,053 deletions.
8 changes: 5 additions & 3 deletions Ray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,14 @@ extern const std::pair<uint32_t, const char *> KnownGPUVendors[] = {
extern const int KnownGPUVendorsCount = 4;
} // namespace Ray

Ray::RendererBase *Ray::CreateRenderer(const settings_t &s, ILog *log, const Bitmask<eRendererType> enabled_types) {
Ray::RendererBase *Ray::CreateRenderer(const settings_t &s, ILog *log,
const std::function<void(int, int, ParallelForFunction &&)> &parallel_for,
const Bitmask<eRendererType> enabled_types) {
#if defined(ENABLE_VK_IMPL)
if (enabled_types & eRendererType::Vulkan) {
log->Info("Ray: Creating Vulkan renderer %ix%i", s.w, s.h);
try {
return Vk::CreateRenderer(s, log);
return Vk::CreateRenderer(s, log, parallel_for);
} catch (std::exception &e) {
log->Info("Ray: Failed to create Vulkan renderer, %s", e.what());
}
Expand All @@ -66,7 +68,7 @@ Ray::RendererBase *Ray::CreateRenderer(const settings_t &s, ILog *log, const Bit
if (enabled_types & eRendererType::DirectX12) {
log->Info("Ray: Creating DirectX12 renderer %ix%i", s.w, s.h);
try {
return Dx::CreateRenderer(s, log);
return Dx::CreateRenderer(s, log, parallel_for);
} catch (std::exception &e) {
log->Info("Ray: Failed to create DirectX12 renderer, %s", e.what());
}
Expand Down
6 changes: 4 additions & 2 deletions Ray.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ const Bitmask<eRendererType> DefaultEnabledRenderTypes =
/** @brief Creates renderer
@return pointer to created renderer
*/
RendererBase *CreateRenderer(const settings_t &s, ILog *log = &g_null_log,
Bitmask<eRendererType> enabled_types = DefaultEnabledRenderTypes);
RendererBase *
CreateRenderer(const settings_t &s, ILog *log = &g_null_log,
const std::function<void(int, int, ParallelForFunction &&)> &parallel_for = parallel_for_serial,
Bitmask<eRendererType> enabled_types = DefaultEnabledRenderTypes);

/** @brief Queries available GPU devices
@param log output log
Expand Down
4 changes: 3 additions & 1 deletion RendererBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,8 @@ class RendererBase {
@param alias_memory enable tensom memory aliasing (to lower memory usage)
@param out_props output filter properties
*/
virtual void InitUNetFilter(bool alias_memory, unet_filter_properties_t &out_props) = 0;
virtual unet_filter_properties_t
InitUNetFilter(bool alias_memory,
const std::function<void(int, int, ParallelForFunction &&)> &parallel_for = parallel_for_serial) = 0;
};
} // namespace Ray
9 changes: 7 additions & 2 deletions internal/RendererCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,9 @@ template <typename SIMDPolicy> class Renderer : public RendererBase, private SIM
void GetStats(stats_t &st) override { st = stats_; }
void ResetStats() override { stats_ = {0}; }

void InitUNetFilter(bool alias_memory, unet_filter_properties_t &out_props) override;
unet_filter_properties_t
InitUNetFilter(bool alias_memory,
const std::function<void(int, int, ParallelForFunction &&)> &parallel_for) override;
};
} // namespace Cpu
namespace Ref {
Expand Down Expand Up @@ -1250,21 +1252,24 @@ void Ray::Cpu::Renderer<SIMDPolicy>::UpdateFilterTable(ePixelFilter filter, floa
}

template <typename SIMDPolicy>
void Ray::Cpu::Renderer<SIMDPolicy>::InitUNetFilter(const bool alias_memory, unet_filter_properties_t &out_props) {
Ray::unet_filter_properties_t Ray::Cpu::Renderer<SIMDPolicy>::InitUNetFilter(
const bool alias_memory, const std::function<void(int, int, ParallelForFunction &&)> &parallel_for) {
const int total_count = SetupUNetWeights<float>(true, 1, nullptr, nullptr);
unet_weights_.resize(total_count);
SetupUNetWeights(true, 1, &unet_offsets_, unet_weights_.data());

unet_alias_memory_ = alias_memory;
UpdateUNetFilterMemory();

unet_filter_properties_t out_props;
out_props.pass_count = UNetFilterPasses;
for (int i = 0; i < UNetFilterPasses; ++i) {
std::fill(&out_props.alias_dependencies[i][0], &out_props.alias_dependencies[i][0] + 4, -1);
for (int j = 0; j < int(unet_alias_dependencies_[i].size()); ++j) {
out_props.alias_dependencies[i][j] = unet_alias_dependencies_[i][j];
}
}
return out_props;
}

template <typename SIMDPolicy> void Ray::Cpu::Renderer<SIMDPolicy>::UpdateUNetFilterMemory() {
Expand Down
744 changes: 334 additions & 410 deletions internal/RendererDX.cpp

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion internal/RendererDX.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

namespace Ray {
namespace Dx {
RendererBase *CreateRenderer(const settings_t &s, ILog *log);
RendererBase *CreateRenderer(const settings_t &s, ILog *log,
const std::function<void(int, int, ParallelForFunction &&)> &parallel_for);
} // namespace Dx
} // namespace Ray
137 changes: 72 additions & 65 deletions internal/RendererGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,65 +47,63 @@ class Renderer : public RendererBase {
std::unique_ptr<Context> ctx_;
GpuCommandBuffer external_cmd_buf_;

Shader sh_prim_rays_gen_simple_, sh_prim_rays_gen_adaptive_;
Shader sh_intersect_scene_, sh_intersect_scene_indirect_, sh_intersect_area_lights_;
Shader sh_shade_primary_, sh_shade_primary_sky_, sh_shade_primary_cache_update_, sh_shade_primary_cache_query_,
sh_shade_primary_cache_query_sky_, sh_shade_secondary_, sh_shade_secondary_sky_,
sh_shade_secondary_cache_update_, sh_shade_secondary_cache_query_, sh_shade_secondary_cache_query_sky_;
Shader sh_shade_sky_;
Shader sh_intersect_scene_shadow_, sh_prepare_indir_args_, sh_mix_incremental_, sh_postprocess_,
sh_filter_variance_, sh_nlm_filter_, sh_debug_rt_;
Shader sh_sort_hash_rays_, sh_sort_init_count_table_, sh_sort_reduce_, sh_sort_scan_, sh_sort_scan_add_,
sh_sort_scatter_, sh_sort_reorder_rays_;
Shader sh_intersect_scene_rgen_, sh_intersect_scene_rchit_, sh_intersect_scene_rmiss_,
sh_intersect_scene_indirect_rgen_;
Shader sh_convolution_Img_9_32_, sh_convolution_32_32_Downsample_, sh_convolution_32_48_Downsample_,
sh_convolution_48_64_Downsample_, sh_convolution_64_80_Downsample_, sh_convolution_64_64_,
sh_convolution_64_32_, sh_convolution_80_96_, sh_convolution_96_96_, sh_convolution_112_112_,
sh_convolution_concat_96_64_112_, sh_convolution_concat_112_48_96_, sh_convolution_concat_96_32_64_,
sh_convolution_concat_64_3_64_, sh_convolution_concat_64_6_64_, sh_convolution_concat_64_9_64_,
sh_convolution_32_3_img_;
Shader sh_spatial_cache_update_, sh_spatial_cache_resolve_;

Program prog_prim_rays_gen_simple_, prog_prim_rays_gen_adaptive_;
Program prog_intersect_scene_, prog_intersect_scene_indirect_, prog_intersect_area_lights_;
Program prog_shade_primary_, prog_shade_primary_sky_, prog_shade_primary_cache_update_,
prog_shade_primary_cache_query_, prog_shade_primary_cache_query_sky_, prog_shade_secondary_,
prog_shade_secondary_sky_, prog_shade_secondary_cache_update_, prog_shade_secondary_cache_query_,
prog_shade_secondary_cache_query_sky_;
Program prog_shade_sky_;
Program prog_intersect_scene_shadow_, prog_prepare_indir_args_, prog_mix_incremental_, prog_postprocess_,
prog_filter_variance_, prog_nlm_filter_, prog_debug_rt_;
Program prog_sort_hash_rays_, prog_sort_init_count_table_, prog_sort_reduce_, prog_sort_scan_, prog_sort_scan_add_,
prog_sort_scatter_, prog_sort_reorder_rays_;
Program prog_intersect_scene_rtpipe_, prog_intersect_scene_indirect_rtpipe_;
Program prog_convolution_Img_9_32_, prog_convolution_32_32_Downsample_, prog_convolution_32_48_Downsample_,
prog_convolution_48_64_Downsample_, prog_convolution_64_80_Downsample_, prog_convolution_64_64_,
prog_convolution_64_32_, prog_convolution_80_96_, prog_convolution_96_96_, prog_convolution_112_112_,
prog_convolution_concat_96_64_112_, prog_convolution_concat_112_48_96_, prog_convolution_concat_96_32_64_,
prog_convolution_concat_64_3_64_, prog_convolution_concat_64_6_64_, prog_convolution_concat_64_9_64_,
prog_convolution_32_3_img_;
Program prog_spatial_cache_update_, prog_spatial_cache_resolve_;

Pipeline pi_prim_rays_gen_simple_, pi_prim_rays_gen_adaptive_;
Pipeline pi_intersect_scene_, pi_intersect_scene_indirect_, pi_intersect_area_lights_;
Pipeline pi_shade_primary_, pi_shade_primary_sky_, pi_shade_primary_cache_update_, pi_shade_primary_cache_query_,
pi_shade_primary_cache_query_sky_, pi_shade_secondary_, pi_shade_secondary_sky_,
pi_shade_secondary_cache_update_, pi_shade_secondary_cache_query_, pi_shade_secondary_cache_query_sky_;
Pipeline pi_shade_sky_;
Pipeline pi_intersect_scene_shadow_, pi_prepare_indir_args_, pi_mix_incremental_, pi_postprocess_,
pi_filter_variance_, pi_nlm_filter_, pi_debug_rt_;
Pipeline pi_sort_hash_rays_, pi_sort_init_count_table_, pi_sort_reduce_, pi_sort_scan_, pi_sort_scan_add_,
pi_sort_scatter_, pi_sort_reorder_rays_, pi_intersect_scene_rtpipe_, pi_intersect_scene_indirect_rtpipe_;
Pipeline pi_convolution_Img_9_32_, pi_convolution_32_32_Downsample_, pi_convolution_32_48_Downsample_,
pi_convolution_48_64_Downsample_, pi_convolution_64_80_Downsample_, pi_convolution_64_64_,
pi_convolution_64_32_, pi_convolution_80_96_, pi_convolution_96_96_, pi_convolution_112_112_,
pi_convolution_concat_96_64_112_, pi_convolution_concat_112_48_96_, pi_convolution_concat_96_32_64_,
pi_convolution_concat_64_3_64_, pi_convolution_concat_64_6_64_, pi_convolution_concat_64_9_64_,
pi_convolution_32_3_img_;
Pipeline pi_spatial_cache_update_, pi_spatial_cache_resolve_;

bool InitShaders(ILog *log);
struct {
Shader prim_rays_gen_simple, prim_rays_gen_adaptive;
Shader intersect_scene, intersect_scene_indirect, intersect_area_lights;
Shader shade_primary, shade_primary_sky, shade_primary_cache_update, shade_primary_cache_query,
shade_primary_cache_query_sky, shade_secondary, shade_secondary_sky, shade_secondary_cache_update,
shade_secondary_cache_query, shade_secondary_cache_query_sky;
Shader shade_sky;
Shader intersect_scene_shadow, prepare_indir_args, mix_incremental, postprocess, filter_variance, nlm_filter,
debug_rt;
Shader sort_hash_rays, sort_init_count_table, sort_reduce, sort_scan, sort_scan_add, sort_scatter,
sort_reorder_rays;
Shader intersect_scene_rgen, intersect_scene_rchit, intersect_scene_rmiss, intersect_scene_indirect_rgen;
Shader convolution_Img_9_32, convolution_32_32_Downsample, convolution_32_48_Downsample,
convolution_48_64_Downsample, convolution_64_80_Downsample, convolution_64_64, convolution_64_32,
convolution_80_96, convolution_96_96, convolution_112_112, convolution_concat_96_64_112,
convolution_concat_112_48_96, convolution_concat_96_32_64, convolution_concat_64_9_64, convolution_32_3_img;
Shader spatial_cache_update, spatial_cache_resolve;
} sh_;

struct {
Program prim_rays_gen_simple, prim_rays_gen_adaptive;
Program intersect_scene, intersect_scene_indirect, intersect_area_lights;
Program shade_primary, shade_primary_sky, shade_primary_cache_update, shade_primary_cache_query,
shade_primary_cache_query_sky, shade_secondary, shade_secondary_sky, shade_secondary_cache_update,
shade_secondary_cache_query, shade_secondary_cache_query_sky;
Program shade_sky;
Program intersect_scene_shadow, prepare_indir_args, mix_incremental, postprocess, filter_variance, nlm_filter,
debug_rt;
Program sort_hash_rays, sort_init_count_table, sort_reduce, sort_scan, sort_scan_add, sort_scatter,
sort_reorder_rays;
Program intersect_scene_rtpipe, intersect_scene_indirect_rtpipe;
Program convolution_Img_9_32, convolution_32_32_Downsample, convolution_32_48_Downsample,
convolution_48_64_Downsample, convolution_64_80_Downsample, convolution_64_64, convolution_64_32,
convolution_80_96, convolution_96_96, convolution_112_112, convolution_concat_96_64_112,
convolution_concat_112_48_96, convolution_concat_96_32_64, convolution_concat_64_9_64, convolution_32_3_img;
Program spatial_cache_update, spatial_cache_resolve;
} prog_;

struct {
Pipeline prim_rays_gen_simple, prim_rays_gen_adaptive;
Pipeline intersect_scene, intersect_scene_indirect, intersect_area_lights;
Pipeline shade_primary, shade_primary_sky, shade_primary_cache_update, shade_primary_cache_query,
shade_primary_cache_query_sky, shade_secondary, shade_secondary_sky, shade_secondary_cache_update,
shade_secondary_cache_query, shade_secondary_cache_query_sky;
Pipeline shade_sky;
Pipeline intersect_scene_shadow, prepare_indir_args, mix_incremental, postprocess, filter_variance, nlm_filter,
debug_rt;
Pipeline sort_hash_rays, sort_init_count_table, sort_reduce, sort_scan, sort_scan_add, sort_scatter,
sort_reorder_rays, intersect_scene_rtpipe, intersect_scene_indirect_rtpipe;
Pipeline convolution_Img_9_32, convolution_32_32_Downsample, convolution_32_48_Downsample,
convolution_48_64_Downsample, convolution_64_80_Downsample, convolution_64_64, convolution_64_32,
convolution_80_96, convolution_96_96, convolution_112_112, convolution_concat_96_64_112,
convolution_concat_112_48_96, convolution_concat_96_32_64, convolution_concat_64_9_64, convolution_32_3_img;
Pipeline spatial_cache_update, spatial_cache_resolve;
} pi_;

bool InitPipelines(ILog *log, const std::function<void(int, int, ParallelForFunction &&)> &parallel_for);

int w_ = 0, h_ = 0;
bool use_hwrt_ = false, use_bindless_ = false, use_tex_compression_ = false, use_fp16_ = false,
Expand Down Expand Up @@ -146,7 +144,7 @@ class Renderer : public RendererBase {
Buffer unet_tensors_heap_;
unet_filter_tensors_t unet_tensors_ = {};
SmallVector<int, 2> unet_alias_dependencies_[UNetFilterPasses];
bool InitUNetFilterPipelines();
bool InitUNetFilterPipelines(const std::function<void(int, int, ParallelForFunction &&)> &parallel_for);
void UpdateUNetFilterMemory(CommandBuffer cmd_buf);

struct {
Expand Down Expand Up @@ -314,7 +312,7 @@ class Renderer : public RendererBase {
color_data_rgba_t get_pixels_ref(bool tonemap) const;

public:
Renderer(const settings_t &s, ILog *log);
Renderer(const settings_t &s, ILog *log, const std::function<void(int, int, ParallelForFunction &&)> &parallel_for);
~Renderer() override;

eRendererType type() const override;
Expand Down Expand Up @@ -363,7 +361,9 @@ class Renderer : public RendererBase {
void GetStats(stats_t &st) override { st = stats_; }
void ResetStats() override { stats_ = {0}; }

void InitUNetFilter(bool alias_memory, unet_filter_properties_t &out_props) override;
unet_filter_properties_t
InitUNetFilter(bool alias_memory,
const std::function<void(int, int, ParallelForFunction &&)> &parallel_for) override;
};
} // namespace NS
} // namespace Ray
Expand Down Expand Up @@ -526,7 +526,9 @@ inline void Ray::NS::Renderer::UpdateFilterTable(CommandBuffer cmd_buf, const eP
CopyBufferToBuffer(stage_buf, 0, filter_table_, 0, FILTER_TABLE_SIZE * sizeof(float), cmd_buf);
}

inline void Ray::NS::Renderer::InitUNetFilter(const bool alias_memory, unet_filter_properties_t &out_props) {
inline Ray::unet_filter_properties_t
Ray::NS::Renderer::InitUNetFilter(const bool alias_memory,
const std::function<void(int, int, ParallelForFunction &&)> &parallel_for) {
CommandBuffer cmd_buf = BegSingleTimeCommands(ctx_->api(), ctx_->device(), ctx_->temp_command_pool());

Buffer temp_upload_buf;
Expand Down Expand Up @@ -604,6 +606,7 @@ inline void Ray::NS::Renderer::InitUNetFilter(const bool alias_memory, unet_filt
unet_offsets_.dec_conv0_weight *= el_sz;
unet_offsets_.dec_conv0_bias *= el_sz;

unet_filter_properties_t out_props;
out_props.pass_count = UNetFilterPasses;
for (int i = 0; i < UNetFilterPasses; ++i) {
std::fill(&out_props.alias_dependencies[i][0], &out_props.alias_dependencies[i][0] + 4, -1);
Expand All @@ -612,11 +615,15 @@ inline void Ray::NS::Renderer::InitUNetFilter(const bool alias_memory, unet_filt
}
}

if (!pi_convolution_Img_9_32_) {
if (!InitUNetFilterPipelines()) {
if (!pi_.convolution_Img_9_32) {
if (!InitUNetFilterPipelines(parallel_for)) {
throw std::runtime_error("Error initializing pipeline!");
}
// Release shader modules
sh_ = {};
}

return out_props;
}

inline void Ray::NS::Renderer::UpdateUNetFilterMemory(CommandBuffer cmd_buf) {
Expand Down
Loading

0 comments on commit a03990b

Please sign in to comment.