From 1864f7f60ca77df30f440e3a4c52d69e1872088c Mon Sep 17 00:00:00 2001 From: Sergey Yablokov Date: Mon, 22 Apr 2024 16:39:09 +0200 Subject: [PATCH] Fix incorrect depth-normals during cache update (CPU only) --- internal/CoreSIMD.h | 193 ++++++++++++++++++++++---------------------- 1 file changed, 98 insertions(+), 95 deletions(-) diff --git a/internal/CoreSIMD.h b/internal/CoreSIMD.h index e0f628bb4..a20f2918c 100644 --- a/internal/CoreSIMD.h +++ b/internal/CoreSIMD.h @@ -1609,8 +1609,8 @@ template force_inline fvec scramble_unorm(const uvec &seed, uvec -void get_scrambled_2d_rand(const uvec &dim, const uvec &seed, const int sample, const uint32_t rand_seq[], - fvec out_val[2]) { +std::array, 2> get_scrambled_2d_rand(const uvec &dim, const uvec &seed, const int sample, + const uint32_t rand_seq[]) { const uvec i_seed = hash_combine(seed, dim), x_seed = hash_combine(seed, 2 * dim + 0u), y_seed = hash_combine(seed, 2 * dim + 1); @@ -1618,8 +1618,10 @@ void get_scrambled_2d_rand(const uvec &dim, const uvec &seed, const int sa const auto shuffled_i = ivec(nested_uniform_scramble_base2(uvec(uint32_t(sample)), i_seed) & (RAND_SAMPLES_COUNT - 1)); + std::array, 2> out_val; out_val[0] = scramble_unorm(x_seed, gather(rand_seq, shuffled_dim * 2 * RAND_SAMPLES_COUNT + 2 * shuffled_i + 0)); out_val[1] = scramble_unorm(y_seed, gather(rand_seq, shuffled_dim * 2 * RAND_SAMPLES_COUNT + 2 * shuffled_i + 1)); + return out_val; } // Gram-Schmidt method @@ -2271,11 +2273,12 @@ template force_inline fvec sin(const fvec &v) { } template -force_inline void calc_alpha(const fvec &roughness, const fvec &anisotropy, const fvec ®ularize_alpha, - fvec out_alpha[2]) { +force_inline std::array, 2> calc_alpha(const fvec &roughness, const fvec &anisotropy, + const fvec ®ularize_alpha) { const fvec roughness2 = sqr(roughness); const fvec aspect = sqrt(1.0f - 0.9f * anisotropy); + std::array, 2> out_alpha; out_alpha[0] = (roughness2 / aspect); out_alpha[1] = (roughness2 * aspect); @@ -2283,6 +2286,8 @@ force_inline void calc_alpha(const fvec &roughness, const fvec &anisotropy clamp(2 * out_alpha[0], 0.25f * regularize_alpha, regularize_alpha); where(out_alpha[1] < regularize_alpha, out_alpha[1]) = clamp(2 * out_alpha[1], 0.25f * regularize_alpha, regularize_alpha); + + return out_alpha; } // @@ -2884,9 +2889,8 @@ void Ray::NS::GeneratePrimaryRays(const camera_t &cam, const rect_t &r, int w, i const uvec px_hash = hash(uvec((ixx << 16) | iyy)); const uvec rand_hash = hash_combine(px_hash, rand_seed); - fvec filter_rand[2]; - get_scrambled_2d_rand(uvec(uint32_t(RAND_DIM_FILTER)), rand_hash, iteration - 1, rand_seq, filter_rand); - + std::array, 2> filter_rand = + get_scrambled_2d_rand(uvec(uint32_t(RAND_DIM_FILTER)), rand_hash, iteration - 1, rand_seq); if (cam.filter != ePixelFilter::Box) { filter_rand[0] *= float(FILTER_TABLE_SIZE - 1); filter_rand[1] *= float(FILTER_TABLE_SIZE - 1); @@ -2911,8 +2915,8 @@ void Ray::NS::GeneratePrimaryRays(const camera_t &cam, const rect_t &r, int w, i fvec offset[2] = {0.0f, 0.0f}; if (cam.fstop > 0.0f) { - fvec lens_rand[2]; - get_scrambled_2d_rand(uvec(uint32_t(RAND_DIM_LENS)), rand_hash, iteration - 1, rand_seq, lens_rand); + const std::array, 2> lens_rand = + get_scrambled_2d_rand(uvec(uint32_t(RAND_DIM_LENS)), rand_hash, iteration - 1, rand_seq); offset[0] = 2.0f * lens_rand[0] - 1.0f; offset[1] = 2.0f * lens_rand[1] - 1.0f; @@ -5104,12 +5108,11 @@ void Ray::NS::IntersectScene(ray_data_t &r, const int min_transp_depth, const }) } - fvec mix_term_rand[2]; - get_scrambled_2d_rand(rand_dim + unsigned(RAND_DIM_BSDF_PICK), rand_hash, iteration - 1, rand_seq, - mix_term_rand); + std::array, 2> mix_term_rand = + get_scrambled_2d_rand(rand_dim + unsigned(RAND_DIM_BSDF_PICK), rand_hash, iteration - 1, rand_seq); - fvec tex_rand[2]; - get_scrambled_2d_rand(rand_dim + RAND_DIM_TEX, rand_hash, iteration - 1, rand_seq, tex_rand); + const std::array, 2> tex_rand = + get_scrambled_2d_rand(rand_dim + RAND_DIM_TEX, rand_hash, iteration - 1, rand_seq); { // resolve material ivec ray_queue[S]; @@ -5139,7 +5142,7 @@ void Ray::NS::IntersectScene(ray_data_t &r, const int min_transp_depth, const const uint32_t first_t = mat->textures[BASE_TEXTURE]; if (first_t != 0xffffffff) { fvec mix[4] = {}; - SampleBilinear(textures, first_t, uvs, {0}, tex_rand, same_mi, mix); + SampleBilinear(textures, first_t, uvs, {0}, tex_rand.data(), same_mi, mix); if (first_t & TEX_YCOCG_BIT) { YCoCg_to_RGB(mix, mix); } @@ -5342,8 +5345,8 @@ void Ray::NS::IntersectScene(const shadow_ray_t &r, const int max_transp_dept }) } - fvec tex_rand[2]; - get_scrambled_2d_rand(rand_dim + RAND_DIM_TEX, rand_hash, iteration - 1, rand_seq, tex_rand); + const std::array, 2> tex_rand = + get_scrambled_2d_rand(rand_dim + RAND_DIM_TEX, rand_hash, iteration - 1, rand_seq); ivec mat_index = gather(reinterpret_cast(sc.tri_materials), tri_index) & ivec((MATERIAL_INDEX_BITS << 16) | MATERIAL_INDEX_BITS); @@ -5391,7 +5394,7 @@ void Ray::NS::IntersectScene(const shadow_ray_t &r, const int max_transp_dept const uint32_t first_t = mat->textures[BASE_TEXTURE]; if (first_t != 0xffffffff) { fvec mix[4] = {}; - SampleBilinear(textures, first_t, sh_uvs, {0}, tex_rand, same_mi, mix); + SampleBilinear(textures, first_t, sh_uvs, {0}, tex_rand.data(), same_mi, mix); if (first_t & TEX_YCOCG_BIT) { YCoCg_to_RGB(mix, mix); } @@ -6658,12 +6661,12 @@ Ray::NS::ivec Ray::NS::Evaluate_GlossyNode(const light_sample_t &ls, const tangent_from_world(surf.T, surf.B, surf.N, ls.L, light_dir_ts); tangent_from_world(surf.T, surf.B, surf.N, H, sampled_normal_ts); - fvec spec_col[4], alpha[2]; - calc_alpha(roughness, fvec{0.0f}, regularize_alpha, alpha); + const std::array, 2> alpha = calc_alpha(roughness, fvec{0.0f}, regularize_alpha); mask &= simd_cast(alpha[0] * alpha[1] >= 1e-7f); - Evaluate_GGXSpecular_BSDF(view_dir_ts, sampled_normal_ts, light_dir_ts, alpha, fvec{spec_ior}, fvec{spec_F0}, - base_color, base_color, spec_col); + fvec spec_col[4]; + Evaluate_GGXSpecular_BSDF(view_dir_ts, sampled_normal_ts, light_dir_ts, alpha.data(), fvec{spec_ior}, + fvec{spec_F0}, base_color, base_color, spec_col); const fvec &bsdf_pdf = spec_col[3]; const fvec mis_weight = @@ -6687,12 +6690,11 @@ void Ray::NS::Sample_GlossyNode(const ray_data_t &ray, const ivec &mask, c const fvec base_color[3], const fvec &roughness, const fvec ®ularize_alpha, const fvec &spec_ior, const fvec &spec_F0, const fvec rand[2], const fvec &mix_weight, ray_data_t &new_ray) { - fvec alpha[2]; - calc_alpha(roughness, fvec{0.0f}, regularize_alpha, alpha); + const std::array, 2> alpha = calc_alpha(roughness, fvec{0.0f}, regularize_alpha); fvec V[3], F[4]; - Sample_GGXSpecular_BSDF(surf.T, surf.B, surf.N, ray.d, alpha, spec_ior, spec_F0, base_color, base_color, rand, V, - F); + Sample_GGXSpecular_BSDF(surf.T, surf.B, surf.N, ray.d, alpha.data(), spec_ior, spec_F0, base_color, base_color, + rand, V, F); where(mask, new_ray.depth) = pack_ray_type(RAY_TYPE_SPECULAR); where(mask, new_ray.depth) |= @@ -6725,9 +6727,9 @@ Ray::NS::Evaluate_RefractiveNode(const light_sample_t &ls, const ray_data_t refr_col[4], alpha[2]; - calc_alpha(roughness, fvec{0.0f}, regularize_alpha, alpha); - Evaluate_GGXRefraction_BSDF(view_dir_ts, sampled_normal_ts, light_dir_ts, alpha, fvec{eta}, base_color, + fvec refr_col[4]; + const std::array, 2> alpha = calc_alpha(roughness, fvec{0.0f}, regularize_alpha); + Evaluate_GGXRefraction_BSDF(view_dir_ts, sampled_normal_ts, light_dir_ts, alpha.data(), fvec{eta}, base_color, refr_col); const fvec &bsdf_pdf = refr_col[3]; @@ -6756,9 +6758,9 @@ void Ray::NS::Sample_RefractiveNode(const ray_data_t &ray, const ivec &mas const fvec &mix_weight, ray_data_t &new_ray) { const fvec eta = select(is_backfacing, (int_ior / ext_ior), (ext_ior / int_ior)); - fvec V[4], F[4], alpha[2]; - calc_alpha(roughness, fvec{0.0f}, regularize_alpha, alpha); - Sample_GGXRefraction_BSDF(surf.T, surf.B, surf.N, ray.d, alpha, eta, base_color, rand, V, F); + fvec V[4], F[4]; + const std::array, 2> alpha = calc_alpha(roughness, fvec{0.0f}, regularize_alpha); + Sample_GGXRefraction_BSDF(surf.T, surf.B, surf.N, ray.d, alpha.data(), eta, base_color, rand, V, F); where(mask, new_ray.depth) = pack_ray_type(RAY_TYPE_REFR); where(mask, new_ray.depth) |= @@ -6820,8 +6822,7 @@ Ray::NS::ivec Ray::NS::Evaluate_PrincipledNode( tangent_from_world(surf.T, surf.B, surf.N, ls.L, light_dir_ts); tangent_from_world(surf.T, surf.B, surf.N, H, sampled_normal_ts); - fvec spec_alpha[2]; - calc_alpha(spec.roughness, spec.anisotropy, regularize_alpha, spec_alpha); + const std::array, 2> spec_alpha = calc_alpha(spec.roughness, spec.anisotropy, regularize_alpha); const ivec eval_spec_lobe = simd_cast(lobe_weights.specular > 0.0f) & simd_cast(spec_alpha[0] * spec_alpha[1] >= 1e-7f) & _is_frontfacing & mask; if (eval_spec_lobe.not_all_zeros()) { @@ -6834,8 +6835,7 @@ Ray::NS::ivec Ray::NS::Evaluate_PrincipledNode( UNROLLED_FOR(i, 3, { where(eval_spec_lobe, lcol[i]) += safe_div_pos(ls.col[i] * spec_col[i], ls.pdf); }) } - fvec coat_alpha[2]; - calc_alpha(coat.roughness, fvec{0.0f}, regularize_alpha, coat_alpha); + const std::array, 2> coat_alpha = calc_alpha(coat.roughness, fvec{0.0f}, regularize_alpha); const ivec eval_coat_lobe = simd_cast(lobe_weights.clearcoat > 0.0f) & simd_cast(coat_alpha[0] * coat_alpha[1] >= 1e-7f) & _is_frontfacing & mask; if (eval_coat_lobe.not_all_zeros()) { @@ -6849,14 +6849,13 @@ Ray::NS::ivec Ray::NS::Evaluate_PrincipledNode( { where(eval_coat_lobe, lcol[i]) += safe_div_pos(0.25f * ls.col[i] * clearcoat_col[i], ls.pdf); }) } - fvec refr_spec_alpha[2]; - calc_alpha(spec.roughness, fvec{0.0f}, regularize_alpha, refr_spec_alpha); + const std::array, 2> refr_spec_alpha = calc_alpha(spec.roughness, fvec{0.0f}, regularize_alpha); const ivec eval_refr_spec_lobe = simd_cast(trans.fresnel != 0.0f) & simd_cast(lobe_weights.refraction > 0.0f) & simd_cast(refr_spec_alpha[0] * refr_spec_alpha[1] >= 1e-7f) & _is_frontfacing & mask; if (eval_refr_spec_lobe.not_all_zeros()) { fvec spec_col[4], spec_temp_col[3] = {1.0f, 1.0f, 1.0f}; - Evaluate_GGXSpecular_BSDF(view_dir_ts, sampled_normal_ts, light_dir_ts, refr_spec_alpha, + Evaluate_GGXSpecular_BSDF(view_dir_ts, sampled_normal_ts, light_dir_ts, refr_spec_alpha.data(), fvec{1.0f} /* ior */, fvec{0.0f} /* F0 */, spec_temp_col, spec_col_90, spec_col); where(eval_refr_spec_lobe, bsdf_pdf) += lobe_weights.refraction * trans.fresnel * spec_col[3]; @@ -6866,14 +6865,13 @@ Ray::NS::ivec Ray::NS::Evaluate_PrincipledNode( }) } - fvec refr_trans_alpha[2]; - calc_alpha(trans.roughness, fvec{0.0f}, regularize_alpha, refr_trans_alpha); + const std::array, 2> refr_trans_alpha = calc_alpha(trans.roughness, fvec{0.0f}, regularize_alpha); const ivec eval_refr_trans_lobe = simd_cast(trans.fresnel != 1.0f) & simd_cast(lobe_weights.refraction > 0.0f) & simd_cast(refr_trans_alpha[0] * refr_trans_alpha[1] >= 1e-7f) & _is_backfacing & mask; if (eval_refr_trans_lobe.not_all_zeros()) { fvec refr_col[4]; - Evaluate_GGXRefraction_BSDF(view_dir_ts, sampled_normal_ts, light_dir_ts, refr_trans_alpha, trans.eta, + Evaluate_GGXRefraction_BSDF(view_dir_ts, sampled_normal_ts, light_dir_ts, refr_trans_alpha.data(), trans.eta, diff.base_color, refr_col); where(eval_refr_trans_lobe, bsdf_pdf) += lobe_weights.refraction * (1.0f - trans.fresnel) * refr_col[3]; @@ -6955,10 +6953,10 @@ void Ray::NS::Sample_PrincipledNode(const pass_settings_t &ps, const ray_data_t< if (sample_spec_lobe.not_all_zeros()) { const fvec spec_col_90[3] = {1.0f, 1.0f, 1.0f}; - fvec V[3], F[4], alpha[2]; - calc_alpha(spec.roughness, spec.anisotropy, regularize_alpha, alpha); - Sample_GGXSpecular_BSDF(surf.T, surf.B, surf.N, ray.d, alpha, spec.ior, spec.F0, spec.tmp_col, spec_col_90, - rand, V, F); + fvec V[3], F[4]; + const std::array, 2> alpha = calc_alpha(spec.roughness, spec.anisotropy, regularize_alpha); + Sample_GGXSpecular_BSDF(surf.T, surf.B, surf.N, ray.d, alpha.data(), spec.ior, spec.F0, spec.tmp_col, + spec_col_90, rand, V, F); F[3] *= lobe_weights.specular; fvec new_p[3]; @@ -6986,8 +6984,8 @@ void Ray::NS::Sample_PrincipledNode(const pass_settings_t &ps, const ray_data_t< simd_cast(mix_rand >= lobe_weights.diffuse + lobe_weights.specular) & simd_cast(mix_rand < lobe_weights.diffuse + lobe_weights.specular + lobe_weights.clearcoat) & mask; if (sample_coat_lobe.not_all_zeros()) { - fvec V[3], F[4], alpha[2]; - calc_alpha(coat.roughness, fvec{0.0f}, regularize_alpha, alpha); + fvec V[3], F[4]; + const std::array, 2> alpha = calc_alpha(coat.roughness, fvec{0.0f}, regularize_alpha); Sample_PrincipledClearcoat_BSDF(surf.T, surf.B, surf.N, ray.d, alpha[0], coat.ior, coat.F0, rand, V, F); F[3] *= lobe_weights.clearcoat; @@ -7024,9 +7022,9 @@ void Ray::NS::Sample_PrincipledNode(const pass_settings_t &ps, const ray_data_t< const ivec sample_trans_spec_lobe = simd_cast(mix_rand < trans.fresnel) & sample_trans_lobe; if (sample_trans_spec_lobe.not_all_zeros()) { - fvec _spec_tmp_col[3] = {1.0f, 1.0f, 1.0f}, alpha[2]; - calc_alpha(spec.roughness, fvec{0.0f}, regularize_alpha, alpha); - Sample_GGXSpecular_BSDF(surf.T, surf.B, surf.N, ray.d, alpha, fvec{1.0f} /* ior */, + const std::array, 2> alpha = calc_alpha(spec.roughness, fvec{0.0f}, regularize_alpha); + fvec _spec_tmp_col[3] = {1.0f, 1.0f, 1.0f}; + Sample_GGXSpecular_BSDF(surf.T, surf.B, surf.N, ray.d, alpha.data(), fvec{1.0f} /* ior */, fvec{0.0f} /* F0 */, _spec_tmp_col, _spec_tmp_col, rand, V, F); fvec new_p[3]; @@ -7042,10 +7040,10 @@ void Ray::NS::Sample_PrincipledNode(const pass_settings_t &ps, const ray_data_t< const ivec sample_trans_refr_lobe = ~sample_trans_spec_lobe & sample_trans_lobe; if (sample_trans_refr_lobe.not_all_zeros()) { - fvec temp_F[4], temp_V[4], alpha[2]; - calc_alpha(trans.roughness, fvec{0.0f}, regularize_alpha, alpha); - Sample_GGXRefraction_BSDF(surf.T, surf.B, surf.N, ray.d, alpha, trans.eta, diff.base_color, rand, temp_V, - temp_F); + fvec temp_F[4], temp_V[4]; + const std::array, 2> alpha = calc_alpha(trans.roughness, fvec{0.0f}, regularize_alpha); + Sample_GGXRefraction_BSDF(surf.T, surf.B, surf.N, ray.d, alpha.data(), trans.eta, diff.base_color, rand, + temp_V, temp_F); const fvec _plane_N[3] = {-surf.plane_N[0], -surf.plane_N[1], -surf.plane_N[2]}; fvec new_p[3]; @@ -7103,8 +7101,8 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con // offset of the sequence const auto rand_dim = uvec(RAND_DIM_BASE_COUNT + (total_depth + transp_depth) * RAND_DIM_BOUNCE_COUNT); - fvec tex_rand[2]; - get_scrambled_2d_rand(rand_dim + RAND_DIM_TEX, rand_hash, iteration - 1, rand_seq, tex_rand); + const std::array, 2> tex_rand = + get_scrambled_2d_rand(rand_dim + RAND_DIM_TEX, rand_hash, iteration - 1, rand_seq); const ivec ino_hit = simd_cast(inter.v < 0.0f); if (ino_hit.not_all_zeros()) { @@ -7117,7 +7115,7 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con #endif fvec{-1.0f}); Evaluate_EnvColor(ray, ino_hit, sc.env, *static_cast(textures[0]), pdf_factor, - tex_rand, env_col); + tex_rand.data(), env_col); if (cache_mode != eSpatialCacheMode::Update) { UNROLLED_FOR(i, 3, { env_col[i] = ray.c[i] * env_col[i]; }) } @@ -7144,7 +7142,7 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con if (is_light_hit.not_all_zeros()) { fvec light_col[3] = {}; Evaluate_LightColor(surf.P, ray, is_light_hit, inter, sc.env, sc.lights, uint32_t(sc.li_indices.size()), - *static_cast(textures[0]), tex_rand, light_col); + *static_cast(textures[0]), tex_rand.data(), light_col); if (cache_mode != eSpatialCacheMode::Update) { UNROLLED_FOR(i, 3, { light_col[i] = ray.c[i] * light_col[i]; }) } @@ -7276,8 +7274,8 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con gather(reinterpret_cast(&sc.materials[0].type), mat_index * sizeof(material_t) / sizeof(int)) & 0xff; - fvec mix_term_rand[2]; - get_scrambled_2d_rand(rand_dim + unsigned(RAND_DIM_BSDF_PICK), rand_hash, iteration - 1, rand_seq, mix_term_rand); + const std::array, 2> mix_term_rand = + get_scrambled_2d_rand(rand_dim + unsigned(RAND_DIM_BSDF_PICK), rand_hash, iteration - 1, rand_seq); fvec mix_rand = mix_term_rand[0]; fvec mix_weight = 1.0f; @@ -7312,7 +7310,8 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con const fvec base_lod = get_texture_lod(textures, first_t, lambda, ray_queue[index]); fvec tex_color[4] = {}; - SampleBilinear(textures, first_t, surf.uvs, ivec(base_lod), tex_rand, ray_queue[index], tex_color); + SampleBilinear(textures, first_t, surf.uvs, ivec(base_lod), tex_rand.data(), ray_queue[index], + tex_color); if (first_t & TEX_YCOCG_BIT) { YCoCg_to_RGB(tex_color, tex_color); } @@ -7384,7 +7383,7 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con ray_queue[num++] = diff_t; } - SampleBilinear(textures, first_t, surf.uvs, ivec{0}, tex_rand, ray_queue[index], normals_tex); + SampleBilinear(textures, first_t, surf.uvs, ivec{0}, tex_rand.data(), ray_queue[index], normals_tex); if (first_t & TEX_RECONSTRUCT_Z_BIT) { reconstruct_z |= ray_queue[index]; } @@ -7439,8 +7438,8 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con #endif if (cache_mode == eSpatialCacheMode::Query) { - fvec cache_rand[2]; - get_scrambled_2d_rand(rand_dim + unsigned(RAND_DIM_CACHE), rand_hash, iteration - 1, rand_seq, cache_rand); + const std::array, 2> cache_rand = + get_scrambled_2d_rand(rand_dim + unsigned(RAND_DIM_CACHE), rand_hash, iteration - 1, rand_seq); const uvec grid_level = calc_grid_level(surf.P, sc.spatial_cache_grid); const fvec voxel_size = calc_voxel_size(grid_level, sc.spatial_cache_grid); @@ -7489,14 +7488,13 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con #if USE_NEE light_sample_t ls; if (!sc.light_wnodes.empty()) { - fvec rand_pick_light[2]; - get_scrambled_2d_rand(rand_dim + RAND_DIM_LIGHT_PICK, rand_hash, iteration - 1, rand_seq, rand_pick_light); + const fvec rand_pick_light = + get_scrambled_2d_rand(rand_dim + RAND_DIM_LIGHT_PICK, rand_hash, iteration - 1, rand_seq)[0]; + const std::array, 2> rand_light_uv = + get_scrambled_2d_rand(rand_dim + RAND_DIM_LIGHT, rand_hash, iteration - 1, rand_seq); - fvec rand_light_uv[2]; - get_scrambled_2d_rand(rand_dim + RAND_DIM_LIGHT, rand_hash, iteration - 1, rand_seq, rand_light_uv); - - SampleLightSource(surf.P, surf.T, surf.B, surf.N, sc, textures, rand_pick_light[0], rand_light_uv, tex_rand, - is_active_lane, ls); + SampleLightSource(surf.P, surf.T, surf.B, surf.N, sc, textures, rand_pick_light, rand_light_uv.data(), + tex_rand.data(), is_active_lane, ls); } const fvec N_dot_L = dot3(surf.N, ls.L); #endif @@ -7530,7 +7528,8 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con const fvec base_lod = get_texture_lod(textures, first_t, lambda, ray_queue[index]); fvec tex_color[4] = {}; - SampleBilinear(textures, first_t, surf.uvs, ivec(base_lod), tex_rand, ray_queue[index], tex_color); + SampleBilinear(textures, first_t, surf.uvs, ivec(base_lod), tex_rand.data(), ray_queue[index], + tex_color); if (first_t & TEX_YCOCG_BIT) { YCoCg_to_RGB(tex_color, tex_color); } @@ -7591,7 +7590,7 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con const fvec roughness_lod = get_texture_lod(textures, first_t, lambda, ray_queue[index]); fvec roughness_color[4] = {}; - SampleBilinear(textures, first_t, surf.uvs, ivec(roughness_lod), tex_rand, ray_queue[index], + SampleBilinear(textures, first_t, surf.uvs, ivec(roughness_lod), tex_rand.data(), ray_queue[index], roughness_color); if (first_t & TEX_SRGB_BIT) { srgb_to_linear(roughness_color, roughness_color); @@ -7608,8 +7607,8 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con fvec col[3] = {0.0f, 0.0f, 0.0f}; - fvec rand_uv[2]; - get_scrambled_2d_rand(rand_dim + RAND_DIM_BSDF, rand_hash, iteration - 1, rand_seq, rand_uv); + const std::array, 2> rand_uv = + get_scrambled_2d_rand(rand_dim + RAND_DIM_BSDF, rand_hash, iteration - 1, rand_seq); ivec secondary_mask = {0}, shadow_mask = {0}; @@ -7691,7 +7690,7 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con (spec_depth < ps.max_spec_depth) & (total_depth < ps.max_total_depth) & ray_queue[index]; if (gen_ray.not_all_zeros()) { Sample_GlossyNode(ray, gen_ray, surf, base_color, roughness, regularize_alpha, fvec{spec_ior}, - fvec{spec_F0}, rand_uv, mix_weight, new_ray); + fvec{spec_F0}, rand_uv.data(), mix_weight, new_ray); assert((secondary_mask & gen_ray).all_zeros()); secondary_mask |= gen_ray; } @@ -7710,7 +7709,7 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con (refr_depth < ps.max_refr_depth) & (total_depth < ps.max_total_depth) & ray_queue[index]; if (gen_ray.not_all_zeros()) { Sample_RefractiveNode(ray, gen_ray, surf, base_color, roughness, regularize_alpha, is_backfacing, - fvec{mat->ior}, ext_ior, rand_uv, mix_weight, new_ray); + fvec{mat->ior}, ext_ior, rand_uv.data(), mix_weight, new_ray); assert((secondary_mask & gen_ray).all_zeros()); secondary_mask |= gen_ray; } @@ -7760,8 +7759,8 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con const uint32_t metallic_tex = mat->textures[METALLIC_TEXTURE]; const fvec metallic_lod = get_texture_lod(textures, metallic_tex, lambda, ray_queue[index]); fvec metallic_color[4] = {}; - SampleBilinear(textures, metallic_tex, surf.uvs, ivec(metallic_lod), tex_rand, ray_queue[index], - metallic_color); + SampleBilinear(textures, metallic_tex, surf.uvs, ivec(metallic_lod), tex_rand.data(), + ray_queue[index], metallic_color); metallic *= metallic_color[0]; } @@ -7771,8 +7770,8 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con const uint32_t specular_tex = mat->textures[SPECULAR_TEXTURE]; const fvec specular_lod = get_texture_lod(textures, specular_tex, lambda, ray_queue[index]); fvec specular_color[4] = {}; - SampleBilinear(textures, specular_tex, surf.uvs, ivec(specular_lod), tex_rand, ray_queue[index], - specular_color); + SampleBilinear(textures, specular_tex, surf.uvs, ivec(specular_lod), tex_rand.data(), + ray_queue[index], specular_color); if (specular_tex & TEX_SRGB_BIT) { srgb_to_linear(specular_color, specular_color); } @@ -7835,8 +7834,8 @@ void Ray::NS::ShadeSurface(const pass_settings_t &ps, const float limits[2], con } #endif Sample_PrincipledNode(ps, ray, ray_queue[index], surf, lobe_weights, diff, spec, coat, trans, metallic, - transmission, rand_uv, mix_rand, mix_weight, regularize_alpha, secondary_mask, - new_ray); + transmission, rand_uv.data(), mix_rand, mix_weight, regularize_alpha, + secondary_mask, new_ray); } /*else if (mat->type == TransparentNode) { assert(false); }*/ @@ -7966,34 +7965,38 @@ void Ray::NS::ShadeSecondary(const pass_settings_t &ps, const float clamp_direct const Cpu::TexStorageBase *const textures[], ray_data_t *out_secondary_rays, int *out_secondary_rays_count, shadow_ray_t *out_shadow_rays, int *out_shadow_rays_count, int img_w, color_rgba_t *out_color, - color_rgba_t *out_base_color, color_rgba_t *out_depth_normal) { + color_rgba_t *out_base_color, color_rgba_t *out_depth_normals) { const float limits[2] = {(clamp_direct != 0.0f) ? 3.0f * clamp_direct : FLT_MAX, (ps.clamp_indirect != 0.0f) ? 3.0f * ps.clamp_indirect : FLT_MAX}; for (int i = 0; i < inters.size(); ++i) { const ray_data_t &r = rays[i]; const hit_data_t &inter = inters[i]; - fvec col[4] = {0.0f}; + fvec col[4] = {}, base_color[3] = {}, depth_normal[4] = {}; Ray::NS::ShadeSurface(ps, limits, cache_mode, rand_seq, rand_seed, iteration, inter, r, sc, textures, col, out_secondary_rays, out_secondary_rays_count, out_shadow_rays, out_shadow_rays_count, - (fvec *)nullptr, (fvec *)nullptr); + base_color, depth_normal); const uvec x = r.xy >> 16, y = r.xy & 0x0000FFFF; // TODO: match layouts! - UNROLLED_FOR_S(j, S, { - if (r.mask.template get()) { + for (int j = 0; j < S; ++j) { + if (r.mask[j]) { if (cache_mode != eSpatialCacheMode::Update) { - auto old_val = - fvec4(out_color[y.template get() * img_w + x.template get()].v, vector_aligned); - old_val += - fvec4(col[0].template get(), col[1].template get(), col[2].template get(), 0.0f); - old_val.store_to(out_color[y.template get() * img_w + x.template get()].v, vector_aligned); + auto old_val = fvec4(out_color[y[j] * img_w + x[j]].v, vector_aligned); + old_val += fvec4(col[0][j], col[1][j], col[2][j], 0.0f); + old_val.store_to(out_color[y[j] * img_w + x[j]].v, vector_aligned); } else { UNROLLED_FOR(k, 4, { out_color[y[j] * img_w + x[j]].v[k] = col[k][j]; }) } + if (out_base_color) { + UNROLLED_FOR(k, 3, { out_base_color[y[j] * img_w + x[j]].v[k] = base_color[k][j]; }) + } + if (out_depth_normals) { + UNROLLED_FOR(k, 4, { out_depth_normals[y[j] * img_w + x[j]].v[k] = depth_normal[k][j]; }) + } } - }) + } } }