From 6c386477eab18168976d4ba2d81113e98c9a40c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=9E=8D=E9=9B=AA=E6=97=B6=E5=88=86?= Date: Sun, 4 Aug 2024 17:55:47 +0800 Subject: [PATCH] Bug fixes (#63) * make SOAs self-contained * temprarily fix a hardcoded routine not exec in host code * revert the temporary fix for cuda 12.5 * temporarily disable NVTX to workaround cuda 12.6 the undefined identifier MemoryBarrier issue. * Update README.md --- .github/workflows/auto-merge.yml | 20 +++++++++ .github/workflows/dependabot.yml | 14 ++++++ README.md | 6 ++- common/build/source.cmake | 1 + src/core/common.h | 1 + src/misc/render/ppg/integrator.cpp | 4 +- src/render/passes/errormeasure/metrics.cu | 18 +++----- src/render/wavefront/integrator.cpp | 16 +++---- src/util/soac.cpp | 53 +++++++++++++++++------ 9 files changed, 95 insertions(+), 38 deletions(-) create mode 100644 .github/workflows/auto-merge.yml create mode 100644 .github/workflows/dependabot.yml diff --git a/.github/workflows/auto-merge.yml b/.github/workflows/auto-merge.yml new file mode 100644 index 0000000..13435ec --- /dev/null +++ b/.github/workflows/auto-merge.yml @@ -0,0 +1,20 @@ +name: Auto-merge PRs + +on: + pull_request: + branches: [ hotfix ] + +permissions: + pull-requests: write + contents: write + +jobs: + automerge: + runs-on: ubuntu-latest + if: github.actor == 'cuteday' + steps: + - uses: peter-evans/enable-pull-request-automerge@v3 + with: + token: ${{ secrets.DEPENDABOT_TOKEN }} + pull-request-number: ${{ github.event.pull_request.number }} + merge-method: squash \ No newline at end of file diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml new file mode 100644 index 0000000..b135516 --- /dev/null +++ b/.github/workflows/dependabot.yml @@ -0,0 +1,14 @@ +name: auto-merge + +on: + pull_request: + +jobs: + auto-merge: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ahmadnassri/action-dependabot-auto-merge@v2 + with: + target: minor + github-token: ${{ secrets.DEPENDABOT_TOKEN }} \ No newline at end of file diff --git a/README.md b/README.md index 23ebc1b..ba0ee40 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,6 @@ - [x] GPU path tracing (megakernel/wavefront). - [x] GPU volumetric rendering (wavefront). -- [x] Spectral or tristimulus (RGB) rendering. -- [x] Other distributed effects like DoF and motion blur. - [x] Post processing passes (e.g. denoising). - [x] Single/multi-level scene graph with animation support. - [x] Interactive editing scene components with simple UI. @@ -70,6 +68,10 @@ build/src/kiraray.exe common/configs/example_cbox.json **Python binding.** Several simple interfaces are exposed to python scripting via [pybind11](https://github.com/pybind/pybind11), including a OptiX denoiser wrapper for denoising NumPy or PyTorch tensors, see [scripts](common/scripts) for details. +#### Known Build Issues +- In CUDA 12.5 there exist some CUDA-only expressions in thrust headers. If you use CUDA 12.5, you may consider disable the thrust routines in host code (as done in [this commit](https://github.com/cuteday/KiRaRay/commit/c25c2fab44f0ba18cd99b60a4bc757ec0e1ab2a6)) or update to 12.6. +- In CUDA 12.6 there is a compile error in NVTX related code referenced by thrust (`MemoryBarrier` undefined). While I do not know why, I temporarily disabled NVTX as a workaround by defining the `NVTX_DISABLE` macro. + ### Galleries

diff --git a/common/build/source.cmake b/common/build/source.cmake index 636e797..da35a77 100644 --- a/common/build/source.cmake +++ b/common/build/source.cmake @@ -89,6 +89,7 @@ SET_SOURCE_FILES_PROPERTIES ( ${KRR_RENDER_SOURCE_DIR}/render/wavefront/integrator.cpp ${KRR_RENDER_SOURCE_DIR}/render/wavefront/medium.cpp ${KRR_RENDER_SOURCE_DIR}/render/passes/denoise/denoise.cpp + ${KRR_RENDER_SOURCE_DIR}/render/passes/errormeasure/errormeasure.cpp ${KRR_RENDER_SOURCE_DIR}/util/tables.cpp PROPERTIES LANGUAGE CUDA ) diff --git a/src/core/common.h b/src/core/common.h index 137cc5c..8ee5d32 100644 --- a/src/core/common.h +++ b/src/core/common.h @@ -31,6 +31,7 @@ typedef uint32_t uint; typedef unsigned char uchar; #define KRR_COMMON_H +#define NVTX_DISABLE #if !defined(NAMESPACE_BEGIN) #define NAMESPACE_BEGIN(name) namespace name { diff --git a/src/misc/render/ppg/integrator.cpp b/src/misc/render/ppg/integrator.cpp index 38605d2..f6a329a 100644 --- a/src/misc/render/ppg/integrator.cpp +++ b/src/misc/render/ppg/integrator.cpp @@ -55,9 +55,9 @@ void PPGPathTracer::initialize() { // However, SD-Tree has some recursive routines that may exceed that size; CUDA_CHECK(cudaDeviceSetLimit(cudaLimitStackSize, 4 * 1024)); cudaDeviceSynchronize(); - if (guidedPathState) guidedPathState->resize(maxQueueSize, alloc); + if (guidedPathState) guidedPathState->resize(maxQueueSize); else guidedPathState = alloc.new_object(maxQueueSize, alloc); - if (guidedRayQueue) guidedRayQueue->resize(maxQueueSize, alloc); + if (guidedRayQueue) guidedRayQueue->resize(maxQueueSize); else guidedRayQueue = alloc.new_object(maxQueueSize, alloc); /* @addition VAPG */ if (m_image) m_image->resize(getFrameSize()); diff --git a/src/render/passes/errormeasure/metrics.cu b/src/render/passes/errormeasure/metrics.cu index 7a882de..6b2030e 100644 --- a/src/render/passes/errormeasure/metrics.cu +++ b/src/render/passes/errormeasure/metrics.cu @@ -11,7 +11,7 @@ #define METRIC_IN_SRGB 0 #define CLAMP_PIXEL_ERROR 1 -#define DISCARD_FIREFLIES 1 +#define DISCARD_FIREFLIES 0 NAMESPACE_BEGIN(krr) @@ -114,23 +114,17 @@ float calc_metric(const CudaRenderTarget & frame, const RGBA *reference, default: error = rel_mse(y, ref); } +#if CLAMP_PIXEL_ERROR + error = min(error, CLAMP_PIXEL_ERROR_THRESHOLD); +#endif error_buffer[i] = error; }, KRR_DEFAULT_STREAM); #if DISCARD_FIREFLIES - thrust::sort(thrust::device.on(KRR_DEFAULT_STREAM), error_buffer, - error_buffer + n_elements); + thrust::sort(thrust::device.on(KRR_DEFAULT_STREAM), error_buffer, error_buffer + n_elements); n_elements = n_elements * (1.f - DISCARD_FIREFLIES_PRECENTAGE); #endif - - return thrust::transform_reduce(thrust::device, - error_buffer, error_buffer + n_elements, - [] KRR_HOST_DEVICE (const float &val) -> float { -#if CLAMP_PIXEL_ERROR - return min(val, CLAMP_PIXEL_ERROR_THRESHOLD); -#endif - return val; - }, 0.f, thrust::plus()) / n_elements; + return thrust::reduce(thrust::device, error_buffer, error_buffer + n_elements, 0.f, thrust::plus()) / n_elements; } NAMESPACE_END(krr) \ No newline at end of file diff --git a/src/render/wavefront/integrator.cpp b/src/render/wavefront/integrator.cpp index 8a615c4..fc6d61c 100644 --- a/src/render/wavefront/integrator.cpp +++ b/src/render/wavefront/integrator.cpp @@ -25,22 +25,22 @@ void WavefrontPathTracer::initialize() { maxQueueSize = getFrameSize()[0] * getFrameSize()[1]; cudaDeviceSynchronize(); // necessary, preventing kernel accessing memories tobe free'ed... for (int i = 0; i < 2; i++) - if (rayQueue[i]) rayQueue[i]->resize(maxQueueSize, alloc); + if (rayQueue[i]) rayQueue[i]->resize(maxQueueSize); else rayQueue[i] = alloc.new_object(maxQueueSize, alloc); - if (missRayQueue) missRayQueue->resize(maxQueueSize, alloc); + if (missRayQueue) missRayQueue->resize(maxQueueSize); else missRayQueue = alloc.new_object(maxQueueSize, alloc); - if (hitLightRayQueue) hitLightRayQueue->resize(maxQueueSize, alloc); + if (hitLightRayQueue) hitLightRayQueue->resize(maxQueueSize); else hitLightRayQueue = alloc.new_object(maxQueueSize, alloc); - if (shadowRayQueue) shadowRayQueue->resize(maxQueueSize, alloc); + if (shadowRayQueue) shadowRayQueue->resize(maxQueueSize); else shadowRayQueue = alloc.new_object(maxQueueSize, alloc); - if (scatterRayQueue) scatterRayQueue->resize(maxQueueSize, alloc); + if (scatterRayQueue) scatterRayQueue->resize(maxQueueSize); else scatterRayQueue = alloc.new_object(maxQueueSize, alloc); - if (pixelState) pixelState->resize(maxQueueSize, alloc); + if (pixelState) pixelState->resize(maxQueueSize); else pixelState = alloc.new_object(maxQueueSize, alloc); if (enableMedium) { - if (mediumSampleQueue) mediumSampleQueue->resize(maxQueueSize, alloc); + if (mediumSampleQueue) mediumSampleQueue->resize(maxQueueSize); else mediumSampleQueue = alloc.new_object(maxQueueSize, alloc); - if (mediumScatterQueue) mediumScatterQueue->resize(maxQueueSize, alloc); + if (mediumScatterQueue) mediumScatterQueue->resize(maxQueueSize); else mediumScatterQueue = alloc.new_object(maxQueueSize, alloc); } if (!camera) camera = alloc.new_object(); diff --git a/src/util/soac.cpp b/src/util/soac.cpp index f50b8a3..c797cd4 100644 --- a/src/util/soac.cpp +++ b/src/util/soac.cpp @@ -310,7 +310,7 @@ int main(int argc, char* argv[]) { printf("template class SOA<%s> {\npublic:\n", soa.templateType.c_str(), workItemName.c_str()); else - printf("template <> class SOA<%s> {\n public:\n", soa.type.c_str()); + printf("template <> class SOA<%s> {\npublic:\n", soa.type.c_str()); // Iterator [modified] printf(" using value_type = %s;\n", workItemName.c_str()); printf(" using iterator = SOAIterator;\n"); @@ -325,7 +325,7 @@ int main(int argc, char* argv[]) { // Constructor printf(" SOA() = default;\n"); - printf(" SOA(int n, Allocator alloc) : nAlloc(n) {\n"); + printf(" SOA(int n, Allocator alloc) : nAlloc(n), mAlloc(alloc) {\n"); for (const auto& member : soa.members) { for (int i = 0; i < member.names.size(); ++i) { std::string name = member.names[i]; @@ -353,7 +353,31 @@ int main(int argc, char* argv[]) { } } printf(" }\n"); - printf(" void resize(int n, Allocator alloc) { \n"); + // Deconstructor +#if 0 // TODO: Implement deconstructor for SOAs. + printf(" ~SOA() {\n"); + printf(" if (nAlloc == 0) return;\n"); + for (const auto& member : soa.members) { + for (int i = 0; i < member.names.size(); ++i) { + std::string name = member.names[i]; + if (!member.arraySizes[i].empty()) { + if (isFlatType(member.type) || member.numPointers > 0) { + printf(" for (int i = 0; i < %s; ++i)\n", + member.arraySizes[i].c_str()); + printf(" mAlloc.deallocate_object(this->%s[i]);\n", name.c_str(), + member.GetType().c_str()); + } + } + else { + if (isFlatType(member.type) || member.numPointers > 0) + printf(" mAlloc.deallocate_object(this->%s);\n", + name.c_str(), member.GetType().c_str()); + } + } + } + printf(" }\n"); +#endif + printf(" void resize(int n) { \n"); for (const auto& member : soa.members) { for (int i = 0; i < member.names.size(); ++i) { std::string name = member.names[i]; @@ -361,23 +385,23 @@ int main(int argc, char* argv[]) { printf(" for (int i = 0; i < %s; ++i)\n", member.arraySizes[i].c_str()); if (isFlatType(member.type) || member.numPointers > 0) { - printf(" if (nAlloc) alloc.deallocate_object(this->%s[i]);\n", name.c_str()); - printf(" this->%s[i] = alloc.allocate_object<%s>(n);\n", + printf(" if (nAlloc) mAlloc.deallocate_object(this->%s[i]);\n", name.c_str()); + printf(" this->%s[i] = mAlloc.allocate_object<%s>(n);\n", name.c_str(), member.GetType().c_str()); } else { assert(member.isConst == false && member.numPointers == 0); - printf(" this->%s[i].resize(n, alloc);\n", name.c_str()); + printf(" this->%s[i].resize(n);\n", name.c_str()); } } else { if (isFlatType(member.type) || member.numPointers > 0) { - printf(" if (nAlloc) alloc.deallocate_object(this->%s);\n", name.c_str()); - printf(" this->%s = alloc.allocate_object<%s>(n);\n", + printf(" if (nAlloc) mAlloc.deallocate_object(this->%s);\n", name.c_str()); + printf(" this->%s = mAlloc.allocate_object<%s>(n);\n", name.c_str(), member.GetType().c_str()); } else - printf(" this->%s.resize(n, alloc);\n", name.c_str()); + printf(" this->%s.resize(n);\n", name.c_str()); } } } @@ -479,25 +503,26 @@ int main(int argc, char* argv[]) { printf(" }\n\n"); // Member definitions - printf(" int nAlloc{ };\n"); + printf(" int nAlloc{ 0 };\n"); + printf(" Allocator mAlloc{ };\n"); for (const auto& member : soa.members) { for (int i = 0; i < member.names.size(); ++i) { std::string name = member.names[i]; if (!member.arraySizes[i].empty()) { if (isFlatType(member.type) || member.numPointers > 0) - printf(" %s * /*KRR_RESTRICT*/ %s[%s];\n", + printf(" %s * /*KRR_RESTRICT*/ %s[%s] = {nullptr};\n", member.GetType().c_str(), name.c_str(), member.arraySizes[i].c_str()); else - printf(" SOA<%s> %s[%s];\n", member.type.c_str(), name.c_str(), + printf(" SOA<%s> %s[%s] = {};\n", member.type.c_str(), name.c_str(), member.arraySizes[i].c_str()); } else { if (isFlatType(member.type) || member.numPointers > 0) - printf(" %s * KRR_RESTRICT %s;\n", member.GetType().c_str(), + printf(" %s * KRR_RESTRICT %s {nullptr};\n", member.GetType().c_str(), name.c_str()); else - printf(" SOA<%s> %s;\n", member.type.c_str(), name.c_str()); + printf(" SOA<%s> %s {};\n", member.type.c_str(), name.c_str()); } } }