diff --git a/diff-gaussian-rasterization/cuda_rasterizer/forward.cu b/diff-gaussian-rasterization/cuda_rasterizer/forward.cu index 01cc602..66c3aa8 100644 --- a/diff-gaussian-rasterization/cuda_rasterizer/forward.cu +++ b/diff-gaussian-rasterization/cuda_rasterizer/forward.cu @@ -341,6 +341,7 @@ __device__ void computeCov3D_conditional(const glm::vec3 scale, const float scal template __global__ void preprocessCUDA(int P, int D, int D_t, int M, const float* orig_points, + float* out_means3D, const float* ts, const glm::vec3* scales, const float* scales_t, @@ -403,6 +404,9 @@ __global__ void preprocessCUDA(int P, int D, int D_t, int M, rotations[idx], rotations_r[idx], cov3Ds + idx * 6, p_orig, ts[idx], timestamp, idx, time_mask, opacity); if (!time_mask) return; cov3D = cov3Ds + idx * 6; + out_means3D[idx*3+0]=p_orig.x; + out_means3D[idx*3+1]=p_orig.y; + out_means3D[idx*3+2]=p_orig.z; } else { @@ -641,6 +645,7 @@ void FORWARD::render( void FORWARD::preprocess(int P, int D, int D_t, int M, const float* means3D, + float* out_means3D, const float* ts, const glm::vec3* scales, const float* scales_t, @@ -674,6 +679,7 @@ void FORWARD::preprocess(int P, int D, int D_t, int M, preprocessCUDA << <(P + 255) / 256, 256 >> > ( P, D, D_t, M, means3D, + out_means3D, ts, scales, scales_t, diff --git a/diff-gaussian-rasterization/cuda_rasterizer/forward.h b/diff-gaussian-rasterization/cuda_rasterizer/forward.h index 43bf63d..0e2f345 100644 --- a/diff-gaussian-rasterization/cuda_rasterizer/forward.h +++ b/diff-gaussian-rasterization/cuda_rasterizer/forward.h @@ -23,6 +23,7 @@ namespace FORWARD // Perform initial steps for each Gaussian prior to rasterization. void preprocess(int P, int D, int D_t, int M, const float* orig_points, + float* out_means3D, const float* ts, const glm::vec3* scales, const float* scales_t, diff --git a/diff-gaussian-rasterization/cuda_rasterizer/rasterizer.h b/diff-gaussian-rasterization/cuda_rasterizer/rasterizer.h index 7d94285..4588a58 100644 --- a/diff-gaussian-rasterization/cuda_rasterizer/rasterizer.h +++ b/diff-gaussian-rasterization/cuda_rasterizer/rasterizer.h @@ -36,6 +36,7 @@ namespace CudaRasterizer const float* background, const int width, int height, const float* means3D, + float* out_means3D, const float* shs, const float* colors_precomp, const float* flows_precomp, @@ -66,7 +67,7 @@ namespace CudaRasterizer const int P, int D, int D_t, int M, int R, const float* background, const int width, int height, - const float* means3D, + const float* out_means3D, const float* shs, const float* colors_precomp, const float* flows_2d, diff --git a/diff-gaussian-rasterization/cuda_rasterizer/rasterizer_impl.cu b/diff-gaussian-rasterization/cuda_rasterizer/rasterizer_impl.cu index bf63b1f..b4b038b 100644 --- a/diff-gaussian-rasterization/cuda_rasterizer/rasterizer_impl.cu +++ b/diff-gaussian-rasterization/cuda_rasterizer/rasterizer_impl.cu @@ -204,6 +204,7 @@ int CudaRasterizer::Rasterizer::forward( const float* background, const int width, int height, const float* means3D, + float* out_means3D, const float* shs, const float* colors_precomp, const float* flows_precomp, @@ -259,6 +260,7 @@ int CudaRasterizer::Rasterizer::forward( CHECK_CUDA(FORWARD::preprocess( P, D, D_t, M, means3D, + out_means3D, ts, (glm::vec3*)scales, scales_t, @@ -365,7 +367,7 @@ void CudaRasterizer::Rasterizer::backward( const int P, int D, int D_t, int M, int R, const float* background, const int width, int height, - const float* means3D, + const float* out_means3D, const float* shs, const float* colors_precomp, const float* flows_2d, @@ -455,7 +457,7 @@ void CudaRasterizer::Rasterizer::backward( // use the one we computed ourselves. const float* cov3D_ptr = (cov3D_precomp != nullptr) ? cov3D_precomp : geomState.cov3D; CHECK_CUDA(BACKWARD::preprocess(P, D, D_t, M, - (float3*)means3D, + (float3*)out_means3D, radii, shs, ts, diff --git a/diff-gaussian-rasterization/rasterize_points.cu b/diff-gaussian-rasterization/rasterize_points.cu index 1d93420..2d1ece4 100644 --- a/diff-gaussian-rasterization/rasterize_points.cu +++ b/diff-gaussian-rasterization/rasterize_points.cu @@ -33,7 +33,7 @@ std::function resizeFunctional(torch::Tensor& t) { return lambda; } -std::tuple +std::tuple RasterizeGaussiansCUDA( const torch::Tensor& background, const torch::Tensor& means3D, @@ -81,6 +81,7 @@ RasterizeGaussiansCUDA( torch::Tensor out_depth = torch::full({1, H, W}, 0.0, float_opts); torch::Tensor out_T = torch::full({1, H, W}, 0.0, float_opts); torch::Tensor radii = torch::full({P}, 0, means3D.options().dtype(torch::kInt32)); + torch::Tensor out_means3D = means3D.clone(); torch::Device device(torch::kCUDA); torch::TensorOptions options(torch::kByte); @@ -108,6 +109,7 @@ RasterizeGaussiansCUDA( background.contiguous().data(), W, H, means3D.contiguous().data(), + out_means3D.contiguous().data(), sh.contiguous().data_ptr(), colors.contiguous().data(), flows.contiguous().data(), @@ -141,13 +143,14 @@ RasterizeGaussiansCUDA( CudaRasterizer::GeometryState geoState = CudaRasterizer::GeometryState::fromChunk(geo_ptr, P); torch::Tensor covs3D_com = torch::from_blob(geoState.cov3D, {P, 6}, float_opts); - return std::make_tuple(rendered, out_color, out_flow, out_depth, out_T, radii, geomBuffer, binningBuffer, imgBuffer, covs3D_com); + return std::make_tuple(rendered, out_color, out_flow, out_depth, out_T, radii, geomBuffer, binningBuffer, imgBuffer, covs3D_com, out_means3D); } std::tuple RasterizeGaussiansBackwardCUDA( const torch::Tensor& background, const torch::Tensor& means3D, + const torch::Tensor& out_means3D, const torch::Tensor& radii, const torch::Tensor& colors, const torch::Tensor& flows_2d, @@ -211,7 +214,8 @@ std::tuple(), W, H, - means3D.contiguous().data(), + // means3D.contiguous().data(), + out_means3D.contiguous().data(), sh.contiguous().data(), colors.contiguous().data(), flows_2d.contiguous().data(), diff --git a/diff-gaussian-rasterization/rasterize_points.h b/diff-gaussian-rasterization/rasterize_points.h index 4284285..ad17582 100644 --- a/diff-gaussian-rasterization/rasterize_points.h +++ b/diff-gaussian-rasterization/rasterize_points.h @@ -15,7 +15,7 @@ #include #include -std::tuple +std::tuple RasterizeGaussiansCUDA( const torch::Tensor& background, const torch::Tensor& means3D, @@ -51,6 +51,7 @@ std::tuple