Skip to content

Commit

Permalink
Bug fixes (#63)
Browse files Browse the repository at this point in the history
* make SOAs self-contained
* temprarily fix a hardcoded routine not exec in host code
* revert the temporary fix for cuda 12.5
* temporarily disable NVTX to workaround cuda 12.6
the undefined identifier MemoryBarrier issue.
* Update README.md
  • Loading branch information
cuteday committed Nov 17, 2024
1 parent 8022b37 commit 6c38647
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 38 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/auto-merge.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: Auto-merge PRs

on:
pull_request:
branches: [ hotfix ]

permissions:
pull-requests: write
contents: write

jobs:
automerge:
runs-on: ubuntu-latest
if: github.actor == 'cuteday'
steps:
- uses: peter-evans/enable-pull-request-automerge@v3
with:
token: ${{ secrets.DEPENDABOT_TOKEN }}
pull-request-number: ${{ github.event.pull_request.number }}
merge-method: squash
14 changes: 14 additions & 0 deletions .github/workflows/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: auto-merge

on:
pull_request:

jobs:
auto-merge:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ahmadnassri/action-dependabot-auto-merge@v2
with:
target: minor
github-token: ${{ secrets.DEPENDABOT_TOKEN }}
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
- [x] GPU path tracing (megakernel/wavefront).
- [x] GPU volumetric rendering (wavefront).
- [x] Spectral or tristimulus (RGB) rendering.
- [x] Other distributed effects like DoF and motion blur.
- [x] Post processing passes (e.g. denoising).
- [x] Single/multi-level scene graph with animation support.
- [x] Interactive editing scene components with simple UI.
Expand Down Expand Up @@ -70,6 +68,10 @@ build/src/kiraray.exe common/configs/example_cbox.json

**Python binding.** Several simple interfaces are exposed to python scripting via [pybind11](https://github.com/pybind/pybind11), including a OptiX denoiser wrapper for denoising NumPy or PyTorch tensors, see [scripts](common/scripts) for details.

#### Known Build Issues
- In CUDA 12.5 there exist some CUDA-only expressions in thrust headers. If you use CUDA 12.5, you may consider disable the thrust routines in host code (as done in [this commit](https://github.com/cuteday/KiRaRay/commit/c25c2fab44f0ba18cd99b60a4bc757ec0e1ab2a6)) or update to 12.6.
- In CUDA 12.6 there is a compile error in NVTX related code referenced by thrust (`MemoryBarrier` undefined). While I do not know why, I temporarily disabled NVTX as a workaround by defining the `NVTX_DISABLE` macro.

### Galleries

<p align=center>
Expand Down
1 change: 1 addition & 0 deletions common/build/source.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ SET_SOURCE_FILES_PROPERTIES (
${KRR_RENDER_SOURCE_DIR}/render/wavefront/integrator.cpp
${KRR_RENDER_SOURCE_DIR}/render/wavefront/medium.cpp
${KRR_RENDER_SOURCE_DIR}/render/passes/denoise/denoise.cpp
${KRR_RENDER_SOURCE_DIR}/render/passes/errormeasure/errormeasure.cpp
${KRR_RENDER_SOURCE_DIR}/util/tables.cpp
PROPERTIES LANGUAGE CUDA
)
Expand Down
1 change: 1 addition & 0 deletions src/core/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ typedef uint32_t uint;
typedef unsigned char uchar;

#define KRR_COMMON_H
#define NVTX_DISABLE

#if !defined(NAMESPACE_BEGIN)
#define NAMESPACE_BEGIN(name) namespace name {
Expand Down
4 changes: 2 additions & 2 deletions src/misc/render/ppg/integrator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ void PPGPathTracer::initialize() {
// However, SD-Tree has some recursive routines that may exceed that size;
CUDA_CHECK(cudaDeviceSetLimit(cudaLimitStackSize, 4 * 1024));
cudaDeviceSynchronize();
if (guidedPathState) guidedPathState->resize(maxQueueSize, alloc);
if (guidedPathState) guidedPathState->resize(maxQueueSize);
else guidedPathState = alloc.new_object<GuidedPathStateBuffer>(maxQueueSize, alloc);
if (guidedRayQueue) guidedRayQueue->resize(maxQueueSize, alloc);
if (guidedRayQueue) guidedRayQueue->resize(maxQueueSize);
else guidedRayQueue = alloc.new_object<GuidedRayQueue>(maxQueueSize, alloc);
/* @addition VAPG */
if (m_image) m_image->resize(getFrameSize());
Expand Down
18 changes: 6 additions & 12 deletions src/render/passes/errormeasure/metrics.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

#define METRIC_IN_SRGB 0
#define CLAMP_PIXEL_ERROR 1
#define DISCARD_FIREFLIES 1
#define DISCARD_FIREFLIES 0


NAMESPACE_BEGIN(krr)
Expand Down Expand Up @@ -114,23 +114,17 @@ float calc_metric(const CudaRenderTarget & frame, const RGBA *reference,
default:
error = rel_mse(y, ref);
}
#if CLAMP_PIXEL_ERROR
error = min(error, CLAMP_PIXEL_ERROR_THRESHOLD);
#endif
error_buffer[i] = error;
}, KRR_DEFAULT_STREAM);

#if DISCARD_FIREFLIES
thrust::sort(thrust::device.on(KRR_DEFAULT_STREAM), error_buffer,
error_buffer + n_elements);
thrust::sort(thrust::device.on(KRR_DEFAULT_STREAM), error_buffer, error_buffer + n_elements);
n_elements = n_elements * (1.f - DISCARD_FIREFLIES_PRECENTAGE);
#endif

return thrust::transform_reduce(thrust::device,
error_buffer, error_buffer + n_elements,
[] KRR_HOST_DEVICE (const float &val) -> float {
#if CLAMP_PIXEL_ERROR
return min(val, CLAMP_PIXEL_ERROR_THRESHOLD);
#endif
return val;
}, 0.f, thrust::plus<float>()) / n_elements;
return thrust::reduce(thrust::device, error_buffer, error_buffer + n_elements, 0.f, thrust::plus<float>()) / n_elements;
}

NAMESPACE_END(krr)
16 changes: 8 additions & 8 deletions src/render/wavefront/integrator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,22 @@ void WavefrontPathTracer::initialize() {
maxQueueSize = getFrameSize()[0] * getFrameSize()[1];
cudaDeviceSynchronize(); // necessary, preventing kernel accessing memories tobe free'ed...
for (int i = 0; i < 2; i++)
if (rayQueue[i]) rayQueue[i]->resize(maxQueueSize, alloc);
if (rayQueue[i]) rayQueue[i]->resize(maxQueueSize);
else rayQueue[i] = alloc.new_object<RayQueue>(maxQueueSize, alloc);
if (missRayQueue) missRayQueue->resize(maxQueueSize, alloc);
if (missRayQueue) missRayQueue->resize(maxQueueSize);
else missRayQueue = alloc.new_object<MissRayQueue>(maxQueueSize, alloc);
if (hitLightRayQueue) hitLightRayQueue->resize(maxQueueSize, alloc);
if (hitLightRayQueue) hitLightRayQueue->resize(maxQueueSize);
else hitLightRayQueue = alloc.new_object<HitLightRayQueue>(maxQueueSize, alloc);
if (shadowRayQueue) shadowRayQueue->resize(maxQueueSize, alloc);
if (shadowRayQueue) shadowRayQueue->resize(maxQueueSize);
else shadowRayQueue = alloc.new_object<ShadowRayQueue>(maxQueueSize, alloc);
if (scatterRayQueue) scatterRayQueue->resize(maxQueueSize, alloc);
if (scatterRayQueue) scatterRayQueue->resize(maxQueueSize);
else scatterRayQueue = alloc.new_object<ScatterRayQueue>(maxQueueSize, alloc);
if (pixelState) pixelState->resize(maxQueueSize, alloc);
if (pixelState) pixelState->resize(maxQueueSize);
else pixelState = alloc.new_object<PixelStateBuffer>(maxQueueSize, alloc);
if (enableMedium) {
if (mediumSampleQueue) mediumSampleQueue->resize(maxQueueSize, alloc);
if (mediumSampleQueue) mediumSampleQueue->resize(maxQueueSize);
else mediumSampleQueue = alloc.new_object<MediumSampleQueue>(maxQueueSize, alloc);
if (mediumScatterQueue) mediumScatterQueue->resize(maxQueueSize, alloc);
if (mediumScatterQueue) mediumScatterQueue->resize(maxQueueSize);
else mediumScatterQueue = alloc.new_object<MediumScatterQueue>(maxQueueSize, alloc);
}
if (!camera) camera = alloc.new_object<rt::CameraData>();
Expand Down
53 changes: 39 additions & 14 deletions src/util/soac.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ int main(int argc, char* argv[]) {
printf("template <typename %s> class SOA<%s> {\npublic:\n",
soa.templateType.c_str(), workItemName.c_str());
else
printf("template <> class SOA<%s> {\n public:\n", soa.type.c_str());
printf("template <> class SOA<%s> {\npublic:\n", soa.type.c_str());
// Iterator [modified]
printf(" using value_type = %s;\n", workItemName.c_str());
printf(" using iterator = SOAIterator<value_type>;\n");
Expand All @@ -325,7 +325,7 @@ int main(int argc, char* argv[]) {

// Constructor
printf(" SOA() = default;\n");
printf(" SOA(int n, Allocator alloc) : nAlloc(n) {\n");
printf(" SOA(int n, Allocator alloc) : nAlloc(n), mAlloc(alloc) {\n");
for (const auto& member : soa.members) {
for (int i = 0; i < member.names.size(); ++i) {
std::string name = member.names[i];
Expand Down Expand Up @@ -353,31 +353,55 @@ int main(int argc, char* argv[]) {
}
}
printf(" }\n");
printf(" void resize(int n, Allocator alloc) { \n");
// Deconstructor
#if 0 // TODO: Implement deconstructor for SOAs.
printf(" ~SOA() {\n");
printf(" if (nAlloc == 0) return;\n");
for (const auto& member : soa.members) {
for (int i = 0; i < member.names.size(); ++i) {
std::string name = member.names[i];
if (!member.arraySizes[i].empty()) {
if (isFlatType(member.type) || member.numPointers > 0) {
printf(" for (int i = 0; i < %s; ++i)\n",
member.arraySizes[i].c_str());
printf(" mAlloc.deallocate_object(this->%s[i]);\n", name.c_str(),
member.GetType().c_str());
}
}
else {
if (isFlatType(member.type) || member.numPointers > 0)
printf(" mAlloc.deallocate_object(this->%s);\n",
name.c_str(), member.GetType().c_str());
}
}
}
printf(" }\n");
#endif
printf(" void resize(int n) { \n");
for (const auto& member : soa.members) {
for (int i = 0; i < member.names.size(); ++i) {
std::string name = member.names[i];
if (!member.arraySizes[i].empty()) {
printf(" for (int i = 0; i < %s; ++i)\n",
member.arraySizes[i].c_str());
if (isFlatType(member.type) || member.numPointers > 0) {
printf(" if (nAlloc) alloc.deallocate_object(this->%s[i]);\n", name.c_str());
printf(" this->%s[i] = alloc.allocate_object<%s>(n);\n",
printf(" if (nAlloc) mAlloc.deallocate_object(this->%s[i]);\n", name.c_str());
printf(" this->%s[i] = mAlloc.allocate_object<%s>(n);\n",
name.c_str(), member.GetType().c_str());
}
else {
assert(member.isConst == false && member.numPointers == 0);
printf(" this->%s[i].resize(n, alloc);\n", name.c_str());
printf(" this->%s[i].resize(n);\n", name.c_str());
}
}
else {
if (isFlatType(member.type) || member.numPointers > 0) {
printf(" if (nAlloc) alloc.deallocate_object(this->%s);\n", name.c_str());
printf(" this->%s = alloc.allocate_object<%s>(n);\n",
printf(" if (nAlloc) mAlloc.deallocate_object(this->%s);\n", name.c_str());
printf(" this->%s = mAlloc.allocate_object<%s>(n);\n",
name.c_str(), member.GetType().c_str());
}
else
printf(" this->%s.resize(n, alloc);\n", name.c_str());
printf(" this->%s.resize(n);\n", name.c_str());
}
}
}
Expand Down Expand Up @@ -479,25 +503,26 @@ int main(int argc, char* argv[]) {
printf(" }\n\n");

// Member definitions
printf(" int nAlloc{ };\n");
printf(" int nAlloc{ 0 };\n");
printf(" Allocator mAlloc{ };\n");
for (const auto& member : soa.members) {
for (int i = 0; i < member.names.size(); ++i) {
std::string name = member.names[i];
if (!member.arraySizes[i].empty()) {
if (isFlatType(member.type) || member.numPointers > 0)
printf(" %s * /*KRR_RESTRICT*/ %s[%s];\n",
printf(" %s * /*KRR_RESTRICT*/ %s[%s] = {nullptr};\n",
member.GetType().c_str(), name.c_str(),
member.arraySizes[i].c_str());
else
printf(" SOA<%s> %s[%s];\n", member.type.c_str(), name.c_str(),
printf(" SOA<%s> %s[%s] = {};\n", member.type.c_str(), name.c_str(),
member.arraySizes[i].c_str());
}
else {
if (isFlatType(member.type) || member.numPointers > 0)
printf(" %s * KRR_RESTRICT %s;\n", member.GetType().c_str(),
printf(" %s * KRR_RESTRICT %s {nullptr};\n", member.GetType().c_str(),
name.c_str());
else
printf(" SOA<%s> %s;\n", member.type.c_str(), name.c_str());
printf(" SOA<%s> %s {};\n", member.type.c_str(), name.c_str());
}
}
}
Expand Down

0 comments on commit 6c38647

Please sign in to comment.