Skip to content

Fix Potential Division-by-Zero Issues #88

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cuda_rasterizer/auxiliary.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,13 @@ __forceinline__ __device__ float3 transformVec4x3Transpose(const float3& p, cons
return transformed;
}

#define SAFE_EPSILON 1e-8f

__forceinline__ __device__ float dnormvdz(float3 v, float3 dv)
{
float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
if (sum2 < SAFE_EPSILON) // Prevent division by zero
return 0.0f;
float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);
float dnormvdz = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32;
return dnormvdz;
Expand All @@ -107,6 +111,8 @@ __forceinline__ __device__ float dnormvdz(float3 v, float3 dv)
__forceinline__ __device__ float3 dnormvdv(float3 v, float3 dv)
{
float sum2 = v.x * v.x + v.y * v.y + v.z * v.z;
if (sum2 < SAFE_EPSILON) // Prevent division by zero
return {0.0f, 0.0f, 0.0f};
float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);

float3 dnormvdv;
Expand All @@ -119,6 +125,8 @@ __forceinline__ __device__ float3 dnormvdv(float3 v, float3 dv)
__forceinline__ __device__ float4 dnormvdv(float4 v, float4 dv)
{
float sum2 = v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
if (sum2 < SAFE_EPSILON) // Prevent division by zero
return {0.0f, 0.0f, 0.0f, 0.0f};
float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2);

float4 vdv = { v.x * dv.x, v.y * dv.y, v.z * dv.z, v.w * dv.w };
Expand Down
27 changes: 19 additions & 8 deletions cuda_rasterizer/backward.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ __device__ void computeColorFromSH(int idx, int deg, int max_coeffs, const glm::
// Compute intermediate values, as it is done during forward
glm::vec3 pos = means[idx];
glm::vec3 dir_orig = pos - campos;
glm::vec3 dir = dir_orig / glm::length(dir_orig);
float length_dir = glm::length(dir_orig);

// Handle potential division by zero if pos == campos
glm::vec3 dir = (length_dir < SAFE_EPSILON) ? glm::vec3(0.0f, 0.0f, 0.0f) : dir_orig / length_dir;

glm::vec3* sh = ((glm::vec3*)shs) + idx * max_coeffs;

Expand Down Expand Up @@ -130,7 +133,8 @@ __device__ void computeColorFromSH(int idx, int deg, int max_coeffs, const glm::
glm::vec3 dL_ddir(glm::dot(dRGBdx, dL_dRGB), glm::dot(dRGBdy, dL_dRGB), glm::dot(dRGBdz, dL_dRGB));

// Account for normalization of direction
float3 dL_dmean = dnormvdv(float3{ dir_orig.x, dir_orig.y, dir_orig.z }, float3{ dL_ddir.x, dL_ddir.y, dL_ddir.z });
// dnormvdv handles the zero-length case internally now
float3 dL_dmean = dnormvdv({dir_orig.x, dir_orig.y, dir_orig.z}, {dL_ddir.x, dL_ddir.y, dL_ddir.z});

// Gradients of loss w.r.t. Gaussian means, but only the portion
// that is caused because the mean affects the view-dependent color.
Expand Down Expand Up @@ -159,11 +163,15 @@ __global__ void computeCov2DCUDA(int P,
// Reading location of 3D covariance for this Gaussian
const float* cov3D = cov3Ds + 6 * idx;


// Fetch gradients, recompute 2D covariance and relevant
// intermediate forward results needed in the backward.

float3 mean = means[idx];
float3 dL_dconic = { dL_dconics[4 * idx], dL_dconics[4 * idx + 1], dL_dconics[4 * idx + 3] };
float3 t = transformPoint4x3(mean, view_matrix);

bool safe_tz = abs(t.z) >= SAFE_EPSILON;

const float limx = 1.3f * tan_fovx;
const float limy = 1.3f * tan_fovy;
Expand All @@ -175,8 +183,8 @@ __global__ void computeCov2DCUDA(int P,
const float x_grad_mul = txtz < -limx || txtz > limx ? 0 : 1;
const float y_grad_mul = tytz < -limy || tytz > limy ? 0 : 1;

glm::mat3 J = glm::mat3(h_x / t.z, 0.0f, -(h_x * t.x) / (t.z * t.z),
0.0f, h_y / t.z, -(h_y * t.y) / (t.z * t.z),
glm::mat3 J = glm::mat3(safe_tz ? h_x / t.z : 0.0f, 0.0f, safe_tz ? -(h_x * t.x) / (t.z * t.z) : 0.0f,
0.0f, safe_tz ? h_y / t.z : 0.0f, safe_tz ? -(h_y * t.y) / (t.z * t.z) : 0.0f,
0, 0, 0);

glm::mat3 W = glm::mat3(
Expand All @@ -200,10 +208,12 @@ __global__ void computeCov2DCUDA(int P,

float denom = a * c - b * b;
float dL_da = 0, dL_db = 0, dL_dc = 0;
float denom2inv = 1.0f / ((denom * denom) + 0.0000001f);

if (denom2inv != 0)
float denom2 = denom * denom;

if (abs(denom) > SAFE_EPSILON && safe_tz)
{
float denom2inv = 1.0f / (denom2 + SAFE_EPSILON);
// Gradients of loss w.r.t. entries of 2D covariance matrix,
// given gradients of loss w.r.t. conic matrix (inverse covariance matrix).
// e.g., dL / da = dL / d_conic_a * d_conic_a / d_a
Expand Down Expand Up @@ -254,7 +264,7 @@ __global__ void computeCov2DCUDA(int P,
float dL_dJ11 = W[1][0] * dL_dT10 + W[1][1] * dL_dT11 + W[1][2] * dL_dT12;
float dL_dJ12 = W[2][0] * dL_dT10 + W[2][1] * dL_dT11 + W[2][2] * dL_dT12;

float tz = 1.f / t.z;
float tz = safe_tz ? 1.f / t.z : 0.0f;
float tz2 = tz * tz;
float tz3 = tz2 * tz;

Expand Down Expand Up @@ -337,7 +347,8 @@ __device__ void computeCov3D(int idx, const glm::vec3 scale, float mod, const gl

// Gradients of loss w.r.t. unnormalized quaternion
float4* dL_drot = (float4*)(dL_drots + idx);
*dL_drot = float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w };//dnormvdv(float4{ rot.x, rot.y, rot.z, rot.w }, float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w });
float4 rot4 = {rot.x, rot.y, rot.z, rot.w};
*dL_drot = dnormvdv(rot4, float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w }); //NOW FIXED: dnormvdv(float4{ rot.x, rot.y, rot.z, rot.w }, float4{ dL_dq.x, dL_dq.y, dL_dq.z, dL_dq.w });
}

// Backward pass of the preprocessing steps, except
Expand Down
8 changes: 6 additions & 2 deletions cuda_rasterizer/forward.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ __device__ glm::vec3 computeColorFromSH(int idx, int deg, int max_coeffs, const
// Efficient View Synthesis" by Zhang et al. (2022)
glm::vec3 pos = means[idx];
glm::vec3 dir = pos - campos;
dir = dir / glm::length(dir);
float length_dir = glm::length(dir);
if (length_dir < SAFE_EPSILON) dir = glm::vec3(0.0f, 0.0f, 0.0f);
else dir = dir / length_dir;

glm::vec3* sh = ((glm::vec3*)shs) + idx * max_coeffs;
glm::vec3 result = SH_C0 * sh[0];
Expand Down Expand Up @@ -78,6 +80,8 @@ __device__ float3 computeCov2D(const float3& mean, float focal_x, float focal_y,
// Additionally considers aspect / scaling of viewport.
// Transposes used to account for row-/column-major conventions.
float3 t = transformPoint4x3(mean, viewmatrix);
if (abs(t.z) < SAFE_EPSILON)
return {0.0f, 0.0f, 0.0f};

const float limx = 1.3f * tan_fovx;
const float limy = 1.3f * tan_fovy;
Expand Down Expand Up @@ -217,7 +221,7 @@ __global__ void preprocessCUDA(int P, int D, int M,

// Invert covariance (EWA algorithm)
float det = (cov.x * cov.z - cov.y * cov.y);
if (det == 0.0f)
if (abs(det) < SAFE_EPSILON)
return;
float det_inv = 1.f / det;
float3 conic = { cov.z * det_inv, -cov.y * det_inv, cov.x * det_inv };
Expand Down