diff --git a/cuda_rasterizer/auxiliary.h b/cuda_rasterizer/auxiliary.h index 4d4b9b78..d2507271 100644 --- a/cuda_rasterizer/auxiliary.h +++ b/cuda_rasterizer/auxiliary.h @@ -96,9 +96,13 @@ __forceinline__ __device__ float3 transformVec4x3Transpose(const float3& p, cons return transformed; } +#define SAFE_EPSILON 1e-8f + __forceinline__ __device__ float dnormvdz(float3 v, float3 dv) { float sum2 = v.x * v.x + v.y * v.y + v.z * v.z; + if (sum2 < SAFE_EPSILON) // Prevent division by zero + return 0.0f; float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2); float dnormvdz = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32; return dnormvdz; @@ -107,6 +111,8 @@ __forceinline__ __device__ float dnormvdz(float3 v, float3 dv) __forceinline__ __device__ float3 dnormvdv(float3 v, float3 dv) { float sum2 = v.x * v.x + v.y * v.y + v.z * v.z; + if (sum2 < SAFE_EPSILON) // Prevent division by zero + return {0.0f, 0.0f, 0.0f}; float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2); float3 dnormvdv; @@ -119,6 +125,8 @@ __forceinline__ __device__ float3 dnormvdv(float3 v, float3 dv) __forceinline__ __device__ float4 dnormvdv(float4 v, float4 dv) { float sum2 = v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w; + if (sum2 < SAFE_EPSILON) // Prevent division by zero + return {0.0f, 0.0f, 0.0f, 0.0f}; float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2); float4 vdv = { v.x * dv.x, v.y * dv.y, v.z * dv.z, v.w * dv.w }; diff --git a/cuda_rasterizer/backward.cu b/cuda_rasterizer/backward.cu index 4aa41e1c..14175b68 100644 --- a/cuda_rasterizer/backward.cu +++ b/cuda_rasterizer/backward.cu @@ -22,7 +22,10 @@ __device__ void computeColorFromSH(int idx, int deg, int max_coeffs, const glm:: // Compute intermediate values, as it is done during forward glm::vec3 pos = means[idx]; glm::vec3 dir_orig = pos - campos; - glm::vec3 dir = dir_orig / glm::length(dir_orig); + float length_dir = glm::length(dir_orig); + + // Handle potential division by zero if pos == campos + glm::vec3 dir = (length_dir < SAFE_EPSILON) ? glm::vec3(0.0f, 0.0f, 0.0f) : dir_orig / length_dir; glm::vec3* sh = ((glm::vec3*)shs) + idx * max_coeffs; @@ -130,7 +133,8 @@ __device__ void computeColorFromSH(int idx, int deg, int max_coeffs, const glm:: glm::vec3 dL_ddir(glm::dot(dRGBdx, dL_dRGB), glm::dot(dRGBdy, dL_dRGB), glm::dot(dRGBdz, dL_dRGB)); // Account for normalization of direction - float3 dL_dmean = dnormvdv(float3{ dir_orig.x, dir_orig.y, dir_orig.z }, float3{ dL_ddir.x, dL_ddir.y, dL_ddir.z }); + // dnormvdv handles the zero-length case internally now + float3 dL_dmean = dnormvdv({dir_orig.x, dir_orig.y, dir_orig.z}, {dL_ddir.x, dL_ddir.y, dL_ddir.z}); // Gradients of loss w.r.t. Gaussian means, but only the portion // that is caused because the mean affects the view-dependent color. @@ -159,11 +163,15 @@ __global__ void computeCov2DCUDA(int P, // Reading location of 3D covariance for this Gaussian const float* cov3D = cov3Ds + 6 * idx; + // Fetch gradients, recompute 2D covariance and relevant // intermediate forward results needed in the backward. + float3 mean = means[idx]; float3 dL_dconic = { dL_dconics[4 * idx], dL_dconics[4 * idx + 1], dL_dconics[4 * idx + 3] }; float3 t = transformPoint4x3(mean, view_matrix); + + bool safe_tz = abs(t.z) >= SAFE_EPSILON; const float limx = 1.3f * tan_fovx; const float limy = 1.3f * tan_fovy; @@ -175,8 +183,8 @@ __global__ void computeCov2DCUDA(int P, const float x_grad_mul = txtz < -limx || txtz > limx ? 0 : 1; const float y_grad_mul = tytz < -limy || tytz > limy ? 0 : 1; - glm::mat3 J = glm::mat3(h_x / t.z, 0.0f, -(h_x * t.x) / (t.z * t.z), - 0.0f, h_y / t.z, -(h_y * t.y) / (t.z * t.z), + glm::mat3 J = glm::mat3(safe_tz ? h_x / t.z : 0.0f, 0.0f, safe_tz ? -(h_x * t.x) / (t.z * t.z) : 0.0f, + 0.0f, safe_tz ? h_y / t.z : 0.0f, safe_tz ? -(h_y * t.y) / (t.z * t.z) : 0.0f, 0, 0, 0); glm::mat3 W = glm::mat3( @@ -200,10 +208,12 @@ __global__ void computeCov2DCUDA(int P, float denom = a * c - b * b; float dL_da = 0, dL_db = 0, dL_dc = 0; - float denom2inv = 1.0f / ((denom * denom) + 0.0000001f); - if (denom2inv != 0) + float denom2 = denom * denom; + + if (abs(denom) > SAFE_EPSILON && safe_tz) { + float denom2inv = 1.0f / (denom2 + SAFE_EPSILON); // Gradients of loss w.r.t. entries of 2D covariance matrix, // given gradients of loss w.r.t. conic matrix (inverse covariance matrix). // e.g., dL / da = dL / d_conic_a * d_conic_a / d_a @@ -254,7 +264,7 @@ __global__ void computeCov2DCUDA(int P, float dL_dJ11 = W[1][0] * dL_dT10 + W[1][1] * dL_dT11 + W[1][2] * dL_dT12; float dL_dJ12 = W[2][0] * dL_dT10 + W[2][1] * dL_dT11 + W[2][2] * dL_dT12; - float tz = 1.f / t.z; + float tz = safe_tz ? 1.f / t.z : 0.0f; float tz2 = tz * tz; float tz3 = tz2 * tz; @@ -337,7 +347,8 @@ __device__ void computeCov3D(int idx, const glm::vec3 scale, float mod, const gl // Gradients of loss w.r.t. unnormalized quaternion float4* dL_drot = (float4*)(dL_drots + idx); - *dL_drot = float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w };//dnormvdv(float4{ rot.x, rot.y, rot.z, rot.w }, float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w }); + float4 rot4 = {rot.x, rot.y, rot.z, rot.w}; + *dL_drot = dnormvdv(rot4, float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w }); //NOW FIXED: dnormvdv(float4{ rot.x, rot.y, rot.z, rot.w }, float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w }); } // Backward pass of the preprocessing steps, except diff --git a/cuda_rasterizer/forward.cu b/cuda_rasterizer/forward.cu index c419a328..d5963afe 100644 --- a/cuda_rasterizer/forward.cu +++ b/cuda_rasterizer/forward.cu @@ -24,7 +24,9 @@ __device__ glm::vec3 computeColorFromSH(int idx, int deg, int max_coeffs, const // Efficient View Synthesis" by Zhang et al. (2022) glm::vec3 pos = means[idx]; glm::vec3 dir = pos - campos; - dir = dir / glm::length(dir); + float length_dir = glm::length(dir); + if (length_dir < SAFE_EPSILON) dir = glm::vec3(0.0f, 0.0f, 0.0f); + else dir = dir / length_dir; glm::vec3* sh = ((glm::vec3*)shs) + idx * max_coeffs; glm::vec3 result = SH_C0 * sh[0]; @@ -78,6 +80,8 @@ __device__ float3 computeCov2D(const float3& mean, float focal_x, float focal_y, // Additionally considers aspect / scaling of viewport. // Transposes used to account for row-/column-major conventions. float3 t = transformPoint4x3(mean, viewmatrix); + if (abs(t.z) < SAFE_EPSILON) + return {0.0f, 0.0f, 0.0f}; const float limx = 1.3f * tan_fovx; const float limy = 1.3f * tan_fovy; @@ -217,7 +221,7 @@ __global__ void preprocessCUDA(int P, int D, int M, // Invert covariance (EWA algorithm) float det = (cov.x * cov.z - cov.y * cov.y); - if (det == 0.0f) + if (abs(det) < SAFE_EPSILON) return; float det_inv = 1.f / det; float3 conic = { cov.z * det_inv, -cov.y * det_inv, cov.x * det_inv };