cuteday · cuteday · Apr 15, 2024 · Apr 10, 2024 · Apr 10, 2024 · Apr 12, 2024
diff --git a/.gitmodules b/.gitmodules
@@ -1,23 +1,23 @@
 [submodule "src/ext/assimp"]
 	path = src/ext/assimp
-	url = git@github.com:assimp/assimp.git
+	url = https://github.com/assimp/assimp.git
 [submodule "src/ext/glfw"]
 	path = src/ext/glfw
-	url = git@github.com:glfw/glfw.git
+	url = https://github.com/glfw/glfw.git
 [submodule "src/ext/pybind11"]
 	path = src/ext/pybind11
-	url = git@github.com:pybind/pybind11.git
+	url = https://github.com/pybind/pybind11.git
 	branch = stable
 [submodule "src/ext/pbrtparser"]
 	path = src/ext/pbrtparser
-	url = git@github.com:cuteday/pbrt-parser.git
+	url = https://github.com/cuteday/pbrt-parser.git
 	branch = cute
 [submodule "src/core/math/3rdparty/eigen"]
 	path = src/core/math/3rdparty/eigen
-	url = git@github.com:cuteday/Eigen.git
+	url = https://github.com/cuteday/Eigen.git
 [submodule "src/ext/nvrhi"]
 	path = src/ext/nvrhi
-	url = git@github.com:cuteday/nvrhi.git
+	url = https://github.com/cuteday/nvrhi.git
 [submodule "src/ext/openvdb"]
 	path = src/ext/openvdb
-	url = git@github.com:cuteday/openvdb_win.git
+	url = https://github.com/cuteday/openvdb_win.git
diff --git a/src/core/device/soa.h b/src/core/device/soa.h
@@ -0,0 +1,61 @@
+#pragma once
+#include "common.h"
+#include <type_traits>
+
+NAMESPACE_BEGIN(krr)
+
+// specifications for SOA::GetSetIndirector
+template <typename T> 
+class SOA {
+public:
+	struct GetSetIndirector { 
+		GetSetIndirector() = default; 
+		KRR_CALLABLE operator T() const;
+		KRR_CALLABLE void operator=(const T &val);
+		KRR_CALLABLE void operator=(const GetSetIndirector &other);
+		SOA<T> *soa; int i; 
+	};
+};
+
+// https://en.cppreference.com/w/cpp/named_req/RandomAccessIterator
+template <typename T> 
+class SOAIterator {
+public:
+	using difference_type	= int;
+	using value_type		= T;
+	using reference			= typename SOA<T>::GetSetIndirector;
+	using pointer			= void;
+	using iterator_category = std::random_access_iterator_tag;
+
+	KRR_CALLABLE SOAIterator() : m_soa(nullptr), m_index(0) {}
+	KRR_CALLABLE SOAIterator(SOA<T> *soa, int index) : m_soa(soa), m_index(index) {}
+	KRR_CALLABLE SOAIterator(const SOA<T> *soa, int index) : m_soa(const_cast<SOA<T>*>(soa)), m_index(index) {}
+
+	KRR_CALLABLE SOAIterator& operator +=(int n) { m_index += n; return *this; }
+	KRR_CALLABLE SOAIterator& operator -=(int n) { m_index -= n; return *this; }
+	KRR_CALLABLE SOAIterator& operator ++() { ++m_index; return *this; }
+	KRR_CALLABLE SOAIterator& operator --() { --m_index; return *this; }
+	KRR_CALLABLE SOAIterator operator ++(int) { SOAIterator it = *this; ++m_index; return it; }
+	KRR_CALLABLE SOAIterator operator --(int) { SOAIterator it = *this; --m_index; return it; }
+	KRR_CALLABLE SOAIterator operator+(difference_type n) const { return SOAIterator(m_soa, m_index + n); }
+	KRR_CALLABLE SOAIterator operator-(difference_type n) const { return SOAIterator(m_soa, m_index - n); }
+	KRR_CALLABLE difference_type operator-(const SOAIterator& it) const { return m_index - it.m_index; }
+	KRR_CALLABLE friend SOAIterator operator+(difference_type n, const SOAIterator& it) { return it + n; }
+	KRR_CALLABLE friend SOAIterator operator-(difference_type n, const SOAIterator &it) { return it - n; }
+
+	KRR_CALLABLE bool operator==(const SOAIterator& it) const { return m_index == it.m_index; }
+	KRR_CALLABLE bool operator!=(const SOAIterator &it) const { return m_index != it.m_index; }
+	KRR_CALLABLE bool operator<(const SOAIterator &it) const { return m_index < it.m_index; }
+	KRR_CALLABLE bool operator<=(const SOAIterator &it) const { return m_index <= it.m_index; }
+	KRR_CALLABLE bool operator>(const SOAIterator &it) const { return m_index > it.m_index; }
+	KRR_CALLABLE bool operator>=(const SOAIterator &it) const { return m_index >= it.m_index; }
+
+	KRR_CALLABLE reference operator*() { return {m_soa, m_index}; }
+	KRR_CALLABLE reference operator[](difference_type n) { return {m_soa, m_index + n}; }
+
+private:
+	std::conditional_t<std::is_const_v<T>, const SOA<T>*, SOA<T>*> m_soa;
+	difference_type m_index;
+};
+
+NAMESPACE_END(krr)
diff --git a/src/core/light.h b/src/core/light.h
@@ -240,7 +240,7 @@ class InfiniteLight {
 	}
 
 	KRR_DEVICE Spectrum Li(Vector3f wi, const SampledWavelengths &lambda) const {
-		Vector2f uv = worldToLatLong(rotation.transpose() * wi);
+		Vector2f uv = utils::worldToLatLong(rotation.transpose() * wi);
 		RGB L		= image.isValid() ? tint * image.evaluate(uv).head<3>() : tint;
 		return scale * Spectrum::fromRGB(L, SpectrumType::RGBIlluminant, lambda, *colorSpace);
 	}

diff --git a/src/core/renderpass.cpp b/src/core/renderpass.cpp
@@ -11,6 +11,10 @@ RenderTexture::RenderTexture(vkrhi::IDevice *device, vkrhi::TextureHandle textur
 	mCudaSurface = cudaHandler->mapVulkanTextureToCudaSurface(mTexture, cudaArrayColorAttachment);
 }
 
+RenderTexture::~RenderTexture() { 
+	CUDA_CHECK(cudaDestroySurfaceObject(mCudaSurface)); 
+}
+
 vkrhi::TextureDesc RenderTexture::getVulkanDesc(const Vector2i size, vkrhi::Format format,
 										const std::string name) {
 	vkrhi::TextureDesc textureDesc;

diff --git a/src/core/renderpass.h b/src/core/renderpass.h
@@ -22,7 +22,7 @@ class RenderTexture {
 	using SharedPtr = std::shared_ptr<RenderTexture>;
 
 	RenderTexture(vkrhi::IDevice *device, vkrhi::TextureHandle texture);
-	~RenderTexture() = default;
+	~RenderTexture();
 
 	static RenderTexture::SharedPtr create(vkrhi::IDevice *device, 
 		const Vector2i size, vkrhi::Format format, const std::string name = "");

diff --git a/src/core/window.cpp b/src/core/window.cpp
@@ -497,7 +497,11 @@ void DeviceManager::getFrameSize(int &width, int &height) const {
 void DeviceManager::updateWindowSize() {
 	if (mWindow == nullptr) return;
 	int width, height;
-	glfwGetWindowSize(mWindow, &width, &height);
+	// as the validation layer needs us to get the window extent through this way.
+	// https://www.reddit.com/r/vulkan/comments/14uen19/do_i_need_to_recreate_glfw_surfaces_on_window/
+	auto surfaceCap = mVulkanPhysicalDevice.getSurfaceCapabilitiesKHR(mWindowSurface);
+	width			= surfaceCap.currentExtent.width;
+	height			= surfaceCap.currentExtent.height;
 
 	if (width == 0 || height == 0) {
 		// window is minimized
@@ -1075,6 +1079,9 @@ bool DeviceManager::createSwapChain() {
 
 	const bool enableSwapChainSharing = queues.size() > 1;
 
+	auto nextDesc = vk::SwapchainPresentScalingCreateInfoEXT().setScalingBehavior(
+		vk::PresentScalingFlagBitsEXT::eOneToOne);
+
 	auto desc =
 		vk::SwapchainCreateInfoKHR()
 			.setSurface(mWindowSurface)
@@ -1094,7 +1101,8 @@ bool DeviceManager::createSwapChain() {
 			.setPresentMode(mDeviceParams.vsyncEnabled ? vk::PresentModeKHR::eFifo
 														: vk::PresentModeKHR::eImmediate)
 			.setClipped(true)
-			.setOldSwapchain(nullptr);
+			.setOldSwapchain(nullptr)
+			.setPNext(&nextDesc);
 
 	const vk::Result res = mVulkanDevice.createSwapchainKHR(&desc, nullptr, &mSwapChain);
 	if (res != vk::Result::eSuccess) {

diff --git a/src/render/sampling.h b/src/render/sampling.h
@@ -9,8 +9,6 @@
 
 NAMESPACE_BEGIN(krr)
 
-using namespace utils;
-
 KRR_CALLABLE float evalMIS(float p0, float p1) {
 #if MIS_POWER_HEURISTIC
 	return p0 * p0 / (p0 * p0 + p1 * p1);
@@ -94,7 +92,7 @@ KRR_CALLABLE int sampleDiscrete(gpu::span<const float> weights, float u, float *
 	for (float w : weights) sumWeights += w;
 
 	float up = u * sumWeights;
-	if (up == sumWeights) up = nextFloatDown(up);
+	if (up == sumWeights) up = utils::nextFloatDown(up);
 
 	int offset = 0;
 	float sum  = 0;

diff --git a/src/render/shared.h b/src/render/shared.h
@@ -21,7 +21,6 @@
 
 NAMESPACE_BEGIN(krr)
 
-using namespace utils;
 using namespace rt;
 
 struct HitInfo {

diff --git a/src/render/spectrum.h b/src/render/spectrum.h
@@ -467,9 +467,7 @@ KRR_CALLABLE float RGBColorSpace::lum(const SpectrumType& s, const SampledWavele
 		SampledSpectrum pdf = lambda.pdf();
 		return SampledSpectrum(Ys * s).safeDiv(pdf).mean() / CIE_Y_integral;
 	}
-	else 
-		static_assert(!std::is_same_v<SpectrumType, SpectrumType>, 
-			"SpectrumType must be either RGB or SampledSpectrum");
+	else static_assert(false, "SpectrumType must be either RGB or SampledSpectrum");
 }
 
 KRR_CALLABLE float luminance(RGB color) {

diff --git a/src/render/wavefront/workitem.h b/src/render/wavefront/workitem.h
@@ -1,6 +1,7 @@
 #pragma once
 #include "common.h"
 #include "raytracing.h"
+#include "device/soa.h"
 #include "render/shared.h"
 #include "render/spectrum.h"
 #include "render/materials/bxdf.h"
@@ -95,6 +96,7 @@ struct MediumScatterWorkItem {
 
 #pragma warning (push, 0)
 #pragma warning (disable: ALL_CODE_ANALYSIS_WARNINGS)
+// the following include file is generated by scripts (soac.cpp)
 #include "render/wavefront/workitem_soa.h"
 #pragma warning (pop)
 

diff --git a/src/render/wavefront/workqueue.h b/src/render/wavefront/workqueue.h
@@ -1,7 +1,9 @@
 #pragma once
 #include "common.h"
 #include <atomic>
-
+#ifdef __NVCC__
+#include <thrust/sort.h>
+#endif
 
 #include "device/cuda.h"
 #include "device/atomic.h"
@@ -26,6 +28,12 @@ class PixelStateBuffer : public SOA<PixelState> {
 template <typename WorkItem>
 class WorkQueue : public SOA<WorkItem> {
 public:
+	using value_type			 = WorkItem;
+	using iterator				 = typename SOA<WorkItem>::iterator;
+	using const_iterator		 = typename SOA<WorkItem>::const_iterator;
+	using reverse_iterator		 = typename SOA<WorkItem>::reverse_iterator;
+	using const_reverse_iterator = typename SOA<WorkItem>::const_reverse_iterator;
+
 	WorkQueue() = default;
 	KRR_HOST WorkQueue(int n, Allocator alloc) : SOA<WorkItem>(n, alloc) {}
 	KRR_HOST WorkQueue& operator=(const WorkQueue& w) {
@@ -34,12 +42,14 @@ class WorkQueue : public SOA<WorkItem> {
 		return *this;
 	}
 
-	KRR_CALLABLE int size() const {
-		return m_size.load();
-	}
-	KRR_CALLABLE void reset() {
-		m_size.store(0);
-	}
+	KRR_CALLABLE iterator begin() { return iterator(this, 0); }
+	KRR_CALLABLE const_iterator begin() const { return const_iterator(this, 0); }
+	KRR_CALLABLE iterator end() { return iterator(this, m_size.load()); }
+	KRR_CALLABLE const_iterator end() const { return const_iterator(this, m_size.load()); }
+
+	KRR_CALLABLE int size() const { return m_size.load(); }
+	KRR_CALLABLE int capacity() const { return nAlloc; }
+	KRR_CALLABLE void reset() { m_size.store(0); }
 
 	KRR_CALLABLE int push(const WorkItem& w) {
 		int index = allocateEntry();
@@ -59,10 +69,53 @@ class WorkQueue : public SOA<WorkItem> {
 		return m_size.fetch_add(1);
 	}
 
-private:
 	atomic<int> m_size{ 0 };
 };
 
+template <typename WorkItem, typename Key> 
+class SortableWorkQueue : public WorkQueue<WorkItem> {
+public:
+	using KeyType = Key;
+	SortableWorkQueue() = default;
+	KRR_HOST SortableWorkQueue(int n, Allocator alloc) 
+		: WorkQueue<WorkItem>(n, alloc) {
+		m_keys = TypedBuffer<Key>(n);
+	}
+
+	KRR_HOST SortableWorkQueue &operator=(const SortableWorkQueue &w) {
+		WorkQueue<WorkItem>::operator=(w);
+		m_keys = w.m_keys;
+		return *this;
+	}
+
+	KRR_CALLABLE TypedBuffer<Key>& keys() { return m_keys; }
+
+	template <typename F> 
+	void updateKeys(F mapping, size_t max_elements, const Key& oob_val, CUstream stream) {
+		auto* queue = this;
+		GPUParallelFor(max_elements, [=] KRR_DEVICE (int index) {
+				if (index >= queue->size()) queue->keys()[index] = oob_val;
+				else queue->keys()[index] = mapping(queue->operator[](index));
+			}, stream);
+	}
+
+	template <typename Compare> 
+	void sort(Compare comp, size_t max_elements, CUstream stream) {
+#ifdef __NVCC__
+		thrust::sort_by_key(thrust::device.on(stream), m_keys.data(),
+							m_keys.data() + max_elements, this->begin(), comp);
+#endif
+	}
+
+	void resize(int n, Allocator alloc) {
+		WorkQueue<WorkItem>::resize(n, alloc);
+		m_keys.resize(n);
+	}
+
+protected:
+	TypedBuffer<Key> m_keys;
+};
+
 template <typename T> class MultiWorkQueue;
 
 template <typename... Ts> 
@@ -102,15 +155,22 @@ class MultiWorkQueue<TypePack<Ts...>> {
 };
 
 // Helper functions and basic classes
+
 template <typename F, typename WorkItem>
-void ForAllQueued(const WorkQueue<WorkItem>* q, int nElements,
-	F&& func, CUstream stream = 0) {
-	GPUParallelFor(nElements, [=] KRR_DEVICE(int index) mutable {
-		if (index >= q->size())
-			return;
-		func((*q)[index]);
-	}, stream);
+void ForAllQueued(const WorkQueue<WorkItem> *q, int nElements, F &&func, CUstream stream = 0);
+
+#ifdef __NVCC__
+template <typename F, typename WorkItem>
+void ForAllQueued(const WorkQueue<WorkItem> *q, int nElements, F &&func, CUstream stream) {
+	GPUParallelFor(
+		nElements,
+		[=] KRR_DEVICE(int index) mutable {
+			if (index >= q->size()) return;
+			func((*q)[index]);
+		},
+		stream);
 }
+#endif
 
 class RayQueue : public WorkQueue<RayWorkItem> {
 public:
@@ -298,5 +358,4 @@ class MediumScatterQueue : public WorkQueue<MediumScatterWorkItem> {
 	}
 };
 
-
 NAMESPACE_END(krr)