Skip to content

Commit

Permalink
Refactor simd classes
Browse files Browse the repository at this point in the history
 (match closer to std::simd proposal)
  • Loading branch information
sergcpp committed Mar 11, 2024
1 parent f59d02a commit e68d1bb
Show file tree
Hide file tree
Showing 38 changed files with 7,834 additions and 7,600 deletions.
10 changes: 5 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -204,11 +204,11 @@ set(SOURCE_FILES Bitmask.h
set(SIMD_FILES internal/simd/aligned_allocator.h
internal/simd/detect.h
internal/simd/detect.cpp
internal/simd/simd_vec.h
internal/simd/simd_vec_sse.h
internal/simd/simd_vec_avx.h
internal/simd/simd_vec_avx512.h
internal/simd/simd_vec_neon.h)
internal/simd/simd.h
internal/simd/simd_sse.h
internal/simd/simd_avx.h
internal/simd/simd_avx512.h
internal/simd/simd_neon.h)

set(VK_SOURCE_FILES internal/Vk/AccStructureVK.h
internal/Vk/AccStructureVK.cpp
Expand Down
528 changes: 264 additions & 264 deletions internal/Atmosphere.cpp

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions internal/Atmosphere.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,27 @@ force_inline float from_sub_uvs_to_unit(float u, float resolution) {
return (u - 0.5f / resolution) * (resolution / (resolution - 1.0f));
}

Ref::simd_fvec4 IntegrateOpticalDepth(const atmosphere_params_t &params, const Ref::simd_fvec4 &ray_start,
const Ref::simd_fvec4 &ray_dir);
Ref::fvec4 IntegrateOpticalDepth(const atmosphere_params_t &params, const Ref::fvec4 &ray_start,
const Ref::fvec4 &ray_dir);

template <bool ExpSampleDistribution = true, bool UniformPhase = false>
std::pair<Ref::simd_fvec4, Ref::simd_fvec4>
IntegrateScatteringMain(const atmosphere_params_t &params, const Ref::simd_fvec4 &ray_start,
const Ref::simd_fvec4 &ray_dir, float ray_length, const Ref::simd_fvec4 &light_dir,
const Ref::simd_fvec4 &moon_dir, const Ref::simd_fvec4 &light_color,
std::pair<Ref::fvec4, Ref::fvec4>
IntegrateScatteringMain(const atmosphere_params_t &params, const Ref::fvec4 &ray_start,
const Ref::fvec4 &ray_dir, float ray_length, const Ref::fvec4 &light_dir,
const Ref::fvec4 &moon_dir, const Ref::fvec4 &light_color,
Span<const float> transmittance_lut, Span<const float> multiscatter_lut, float rand_offset,
int sample_count, Ref::simd_fvec4 &inout_transmittance);
int sample_count, Ref::fvec4 &inout_transmittance);

Ref::simd_fvec4 IntegrateScattering(const atmosphere_params_t &params, Ref::simd_fvec4 ray_start,
const Ref::simd_fvec4 &ray_dir, float ray_length, const Ref::simd_fvec4 &light_dir,
float light_angle, const Ref::simd_fvec4 &light_color,
Ref::fvec4 IntegrateScattering(const atmosphere_params_t &params, Ref::fvec4 ray_start,
const Ref::fvec4 &ray_dir, float ray_length, const Ref::fvec4 &light_dir,
float light_angle, const Ref::fvec4 &light_color,
Span<const float> transmittance_lut, Span<const float> multiscatter_lut,
uint32_t rand_hash);

// Transmittance LUT function parameterisation from Bruneton 2017
// https://github.com/ebruneton/precomputed_atmospheric_scattering
void UvToLutTransmittanceParams(const atmosphere_params_t &params, Ref::simd_fvec2 uv, float &view_height,
void UvToLutTransmittanceParams(const atmosphere_params_t &params, Ref::fvec2 uv, float &view_height,
float &view_zenith_cos_angle);
Ref::simd_fvec2 LutTransmittanceParamsToUv(const atmosphere_params_t &params, float view_height,
Ref::fvec2 LutTransmittanceParamsToUv(const atmosphere_params_t &params, float view_height,
float view_zenith_cos_angle);
} // namespace Ray
42 changes: 21 additions & 21 deletions internal/BVHSplit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,27 @@ const float SpatialSplitAlpha = 0.00001f;
const int NumSpatialSplitBins = 256;

struct bbox_t {
Ref::simd_fvec4 min = {FLT_MAX}, max = {-FLT_MAX};
Ref::fvec4 min = {FLT_MAX}, max = {-FLT_MAX};
bbox_t() = default;
bbox_t(const Ref::simd_fvec4 &_min, const Ref::simd_fvec4 &_max) : min(_min), max(_max) {}
bbox_t(const Ref::fvec4 &_min, const Ref::fvec4 &_max) : min(_min), max(_max) {}

float surface_area() const { return surface_area(min, max); }

static float surface_area(const Ref::simd_fvec4 &min, const Ref::simd_fvec4 &max) {
const Ref::simd_fvec4 e = max - min;
static float surface_area(const Ref::fvec4 &min, const Ref::fvec4 &max) {
const Ref::fvec4 e = max - min;
return 2 * (e.get<0>() + e.get<1>() + e.get<2>());
// return e[0] * e[1] + e[0] * e[2] + e[1] * e[2];
}
};

// stolen from Mitsuba
static int sutherland_hodgman(const Ref::simd_dvec3 *input, const int in_count, Ref::simd_dvec3 *output, const int axis,
static int sutherland_hodgman(const Ref::dvec3 *input, const int in_count, Ref::dvec3 *output, const int axis,
const double split_pos, const bool is_minimum) {
if (in_count < 3) {
return 0;
}

Ref::simd_dvec3 cur = input[0];
Ref::dvec3 cur = input[0];
const double sign = is_minimum ? 1.0 : -1.0;
double distance = sign * (cur[axis] - split_pos);
bool cur_is_inside = (distance >= 0);
Expand All @@ -46,7 +46,7 @@ static int sutherland_hodgman(const Ref::simd_dvec3 *input, const int in_count,
if (nextIdx == in_count) {
nextIdx = 0;
}
const Ref::simd_dvec3 &next = input[nextIdx];
const Ref::dvec3 &next = input[nextIdx];
distance = sign * (next[axis] - split_pos);
bool next_is_inside = (distance >= 0);

Expand All @@ -56,13 +56,13 @@ static int sutherland_hodgman(const Ref::simd_dvec3 *input, const int in_count,
} else if (cur_is_inside && !next_is_inside) {
// Going outside -- add the intersection
double t = (split_pos - cur[axis]) / (next[axis] - cur[axis]);
Ref::simd_dvec3 p = cur + (next - cur) * t;
Ref::dvec3 p = cur + (next - cur) * t;
p.set(axis, split_pos); // Avoid roundoff errors
output[out_count++] = p;
} else if (!cur_is_inside && next_is_inside) {
// Coming back inside -- add the intersection + next vertex
double t = (split_pos - cur[axis]) / (next[axis] - cur[axis]);
Ref::simd_dvec3 &p = output[out_count++];
Ref::dvec3 &p = output[out_count++];
p = cur + (next - cur) * t;
p.set(axis, split_pos); // Avoid roundoff errors
output[out_count++] = next;
Expand Down Expand Up @@ -105,9 +105,9 @@ force_inline float castflt_up(const double val) {
return a;
}

bbox_t GetClippedAABB(const Ref::simd_fvec3 &_v0, const Ref::simd_fvec3 &_v1, const Ref::simd_fvec3 &_v2,
bbox_t GetClippedAABB(const Ref::fvec3 &_v0, const Ref::fvec3 &_v1, const Ref::fvec3 &_v2,
const bbox_t &limits) {
Ref::simd_dvec3 vertices1[9], vertices2[9];
Ref::dvec3 vertices1[9], vertices2[9];
int vertex_count = 3;

vertices1[0] = {double(_v0[0]), double(_v0[1]), double(_v0[2])};
Expand All @@ -134,9 +134,9 @@ bbox_t GetClippedAABB(const Ref::simd_fvec3 &_v0, const Ref::simd_fvec3 &_v1, co
} // namespace Ray

Ray::split_data_t Ray::SplitPrimitives_SAH(const prim_t *primitives, Span<const uint32_t> prim_indices,
const vtx_attribute_t &positions, const Ref::simd_fvec4 &bbox_min,
const Ref::simd_fvec4 &bbox_max, const Ref::simd_fvec4 &root_min,
const Ref::simd_fvec4 &root_max, const bvh_settings_t &s) {
const vtx_attribute_t &positions, const Ref::fvec4 &bbox_min,
const Ref::fvec4 &bbox_max, const Ref::fvec4 &root_min,
const Ref::fvec4 &root_max, const bvh_settings_t &s) {
const int num_prims = int(prim_indices.size());
const bbox_t whole_box = {bbox_min, bbox_max};

Expand All @@ -148,9 +148,9 @@ Ray::split_data_t Ray::SplitPrimitives_SAH(const prim_t *primitives, Span<const
for (int i = 0; i < num_prims; i++) {
const prim_t &p = primitives[prim_indices[i]];

const auto v0 = Ref::simd_fvec3{&positions.data[positions.offset + p.i0 * positions.stride]},
v1 = Ref::simd_fvec3{&positions.data[positions.offset + p.i1 * positions.stride]},
v2 = Ref::simd_fvec3{&positions.data[positions.offset + p.i2 * positions.stride]};
const auto v0 = Ref::fvec3{&positions.data[positions.offset + p.i0 * positions.stride]},
v1 = Ref::fvec3{&positions.data[positions.offset + p.i1 * positions.stride]},
v2 = Ref::fvec3{&positions.data[positions.offset + p.i2 * positions.stride]};

modified_prim_bounds[i] = GetClippedAABB(v0, v1, v2, whole_box);
}
Expand Down Expand Up @@ -314,7 +314,7 @@ Ray::split_data_t Ray::SplitPrimitives_SAH(const prim_t *primitives, Span<const

const bbox_t overlap = {max(res_left_bounds.min, res_right_bounds.min),
min(res_left_bounds.max, res_right_bounds.max)};
Ref::simd_ivec4 test = simd_cast(overlap.max <= overlap.min);
Ref::ivec4 test = simd_cast(overlap.max <= overlap.min);
test.set<3>(0);

if (s.allow_spatial_splits && test.all_zeros() &&
Expand Down Expand Up @@ -379,9 +379,9 @@ Ray::split_data_t Ray::SplitPrimitives_SAH(const prim_t *primitives, Span<const
bins[exit_index].exit_counter++;

if (!positions.data.empty()) {
auto v0 = Ref::simd_fvec3{&positions.data[positions.offset + p.i0 * positions.stride]},
v1 = Ref::simd_fvec3{&positions.data[positions.offset + p.i1 * positions.stride]},
v2 = Ref::simd_fvec3{&positions.data[positions.offset + p.i2 * positions.stride]};
auto v0 = Ref::fvec3{&positions.data[positions.offset + p.i0 * positions.stride]},
v1 = Ref::fvec3{&positions.data[positions.offset + p.i1 * positions.stride]},
v2 = Ref::fvec3{&positions.data[positions.offset + p.i2 * positions.stride]};

for (int j = enter_index; j <= exit_index; j++) {
bbox_t box = GetClippedAABB(v0, v1, v2, bins[j].limits);
Expand Down
10 changes: 5 additions & 5 deletions internal/BVHSplit.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@
namespace Ray {
struct prim_t {
uint32_t i0, i1, i2;
Ref::simd_fvec4 bbox_min, bbox_max;
Ref::fvec4 bbox_min, bbox_max;
};

struct split_data_t {
std::vector<uint32_t> left_indices, right_indices;
Ref::simd_fvec4 left_bounds[2], right_bounds[2];
Ref::fvec4 left_bounds[2], right_bounds[2];
};

split_data_t SplitPrimitives_SAH(const prim_t *primitives, Span<const uint32_t> prim_indices,
const vtx_attribute_t &positions, const Ref::simd_fvec4 &bbox_min,
const Ref::simd_fvec4 &bbox_max, const Ref::simd_fvec4 &root_min,
const Ref::simd_fvec4 &root_max, const bvh_settings_t &s);
const vtx_attribute_t &positions, const Ref::fvec4 &bbox_min,
const Ref::fvec4 &bbox_max, const Ref::fvec4 &root_min,
const Ref::fvec4 &root_max, const bvh_settings_t &s);

} // namespace Ray
Loading

0 comments on commit e68d1bb

Please sign in to comment.