toggle antialiasing

alanvinx · alanvinx · commit 9c5c2028f6fb · 2024-09-06T16:31:26.000+02:00
diff --git a/cuda_rasterizer/auxiliary.h b/cuda_rasterizer/auxiliary.h
@@ -17,7 +17,6 @@
 
 #define BLOCK_SIZE (BLOCK_X * BLOCK_Y)
 #define NUM_WARPS (BLOCK_SIZE/32)
-#define DGR_FIX_AA
 // Spherical harmonics coefficients
 __device__ const float SH_C0 = 0.28209479177387814f;
 __device__ const float SH_C1 = 0.4886025119029199f;
diff --git a/cuda_rasterizer/backward.cu b/cuda_rasterizer/backward.cu
@@ -156,7 +156,8 @@ __global__ void computeCov2DCUDA(int P,
 	float* dL_dopacity,
 	const float* dL_dinvdepth,
 	float3* dL_dmeans,
-	float* dL_dcov)
+	float* dL_dcov,
+	bool antialiasing)
 {
 	auto idx = cg::this_grid().thread_rank();
 	if (idx >= P || !(radii[idx] > 0))
@@ -205,41 +206,44 @@ __global__ void computeCov2DCUDA(int P,
 	float c_yy = cov2D[1][1];
 	
 	constexpr float h_var = 0.3f;
-#ifdef DGR_FIX_AA
-	 const float det_cov = c_xx * c_yy - c_xy * c_xy;
-	c_xx += h_var;
-	c_yy += h_var;
-	const float det_cov_plus_h_cov = c_xx * c_yy - c_xy * c_xy;
-	const float h_convolution_scaling = sqrt(max(0.000025f, det_cov / det_cov_plus_h_cov)); // max for numerical stability
-	const float dL_dopacity_v = dL_dopacity[idx];
-	const float d_h_convolution_scaling = dL_dopacity_v * opacities[idx];
-	dL_dopacity[idx] = dL_dopacity_v * h_convolution_scaling;
-	const float d_inside_root = (det_cov / det_cov_plus_h_cov) <= 0.000025f ? 0.f : d_h_convolution_scaling / (2 * h_convolution_scaling);
-#else
-	c_xx += h_var;
-	c_yy += h_var;
-#endif
+	float d_inside_root = 0.f;
+	if(antialiasing)
+	{
+		const float det_cov = c_xx * c_yy - c_xy * c_xy;
+		c_xx += h_var;
+		c_yy += h_var;
+		const float det_cov_plus_h_cov = c_xx * c_yy - c_xy * c_xy;
+		const float h_convolution_scaling = sqrt(max(0.000025f, det_cov / det_cov_plus_h_cov)); // max for numerical stability
+		const float dL_dopacity_v = dL_dopacity[idx];
+		const float d_h_convolution_scaling = dL_dopacity_v * opacities[idx];
+		dL_dopacity[idx] = dL_dopacity_v * h_convolution_scaling;
+		d_inside_root = (det_cov / det_cov_plus_h_cov) <= 0.000025f ? 0.f : d_h_convolution_scaling / (2 * h_convolution_scaling);
+	} 
+	else
+	{
+		c_xx += h_var;
+		c_yy += h_var;
+	}
 	
 	float dL_dc_xx = 0;
 	float dL_dc_xy = 0;
 	float dL_dc_yy = 0;
-#ifdef DGR_FIX_AA
+	if(antialiasing)
 	{
-	               // https://www.wolframalpha.com/input?i=d+%28%28x*y+-+z%5E2%29%2F%28%28x%2Bw%29*%28y%2Bw%29+-+z%5E2%29%29+%2Fdx
-	               // https://www.wolframalpha.com/input?i=d+%28%28x*y+-+z%5E2%29%2F%28%28x%2Bw%29*%28y%2Bw%29+-+z%5E2%29%29+%2Fdz
-	const float x = c_xx;
-	const float y = c_yy;
-	const float z = c_xy;
-	const float w = h_var;
-	const float denom_f = d_inside_root / sq(w * w + w * (x + y) + x * y - z * z);
-	const float dL_dx = w * (w * y + y * y + z * z) * denom_f;
-	const float dL_dy = w * (w * x + x * x + z * z) * denom_f;
-	const float dL_dz = -2.f * w * z * (w + x + y) * denom_f;
-	dL_dc_xx = dL_dx;
-	dL_dc_yy = dL_dy;
-	dL_dc_xy = dL_dz;
+		// https://www.wolframalpha.com/input?i=d+%28%28x*y+-+z%5E2%29%2F%28%28x%2Bw%29*%28y%2Bw%29+-+z%5E2%29%29+%2Fdx
+		// https://www.wolframalpha.com/input?i=d+%28%28x*y+-+z%5E2%29%2F%28%28x%2Bw%29*%28y%2Bw%29+-+z%5E2%29%29+%2Fdz
+		const float x = c_xx;
+		const float y = c_yy;
+		const float z = c_xy;
+		const float w = h_var;
+		const float denom_f = d_inside_root / sq(w * w + w * (x + y) + x * y - z * z);
+		const float dL_dx = w * (w * y + y * y + z * z) * denom_f;
+		const float dL_dy = w * (w * x + x * x + z * z) * denom_f;
+		const float dL_dz = -2.f * w * z * (w + x + y) * denom_f;
+		dL_dc_xx = dL_dx;
+		dL_dc_yy = dL_dy;
+		dL_dc_xy = dL_dz;
 	}
-#endif
 	
 	float denom = c_xx * c_yy - c_xy * c_xy;
 
@@ -658,7 +662,8 @@ void BACKWARD::preprocess(
 	float* dL_dcov3D,
 	float* dL_dsh,
 	glm::vec3* dL_dscale,
-	glm::vec4* dL_drot)
+	glm::vec4* dL_drot,
+	bool antialiasing)
 {
 	// Propagate gradients for the path of 2D conic matrix computation. 
 	// Somewhat long, thus it is its own kernel rather than being part of 
@@ -679,7 +684,8 @@ void BACKWARD::preprocess(
 		dL_dopacity,
 		dL_dinvdepth,
 		(float3*)dL_dmean3D,
-		dL_dcov3D);
+		dL_dcov3D,
+		antialiasing);
 
 	// Propagate gradients for remaining steps: finish 3D mean gradients,
 	// propagate color gradients to SH (if desireD), propagate 3D covariance
diff --git a/cuda_rasterizer/backward.h b/cuda_rasterizer/backward.h
@@ -65,7 +65,8 @@ namespace BACKWARD
 		float* dL_dcov3D,
 		float* dL_dsh,
 		glm::vec3* dL_dscale,
-		glm::vec4* dL_drot);
+		glm::vec4* dL_drot,
+		bool antialiasing);
 }
 
 #endif
diff --git a/cuda_rasterizer/forward.cu b/cuda_rasterizer/forward.cu
@@ -173,7 +173,8 @@ __global__ void preprocessCUDA(int P, int D, int M,
 	float4* conic_opacity,
 	const dim3 grid,
 	uint32_t* tiles_touched,
-	bool prefiltered)
+	bool prefiltered,
+	bool antialiasing)
 {
 	auto idx = cg::this_grid().thread_rank();
 	if (idx >= P)
@@ -216,10 +217,10 @@ __global__ void preprocessCUDA(int P, int D, int M,
 	cov.x += h_var;
 	cov.z += h_var;
 	const float det_cov_plus_h_cov = cov.x * cov.z - cov.y * cov.y;
+	float h_convolution_scaling = 1.0f;
 
-#ifdef DGR_FIX_AA
-	const float h_convolution_scaling = sqrt(max(0.000025f, det_cov / det_cov_plus_h_cov)); // max for numerical stability
-#endif 
+	if(antialiasing)
+		h_convolution_scaling = sqrt(max(0.000025f, det_cov / det_cov_plus_h_cov)); // max for numerical stability
 
 	// Invert covariance (EWA algorithm)
 	const float det = det_cov_plus_h_cov;
@@ -260,11 +261,9 @@ __global__ void preprocessCUDA(int P, int D, int M,
 	// Inverse 2D covariance and opacity neatly pack into one float4
 	float opacity = opacities[idx];
 
-#ifdef DGR_FIX_AA
+
 	conic_opacity[idx] = { conic.x, conic.y, conic.z, opacity * h_convolution_scaling };
-#else
-	conic_opacity[idx] = { conic.x, conic.y, conic.z, opacity };
-#endif
+
 
 	tiles_touched[idx] = (rect_max.y - rect_min.y) * (rect_max.x - rect_min.x);
 }
@@ -451,7 +450,8 @@ void FORWARD::preprocess(int P, int D, int M,
 	float4* conic_opacity,
 	const dim3 grid,
 	uint32_t* tiles_touched,
-	bool prefiltered)
+	bool prefiltered,
+	bool antialiasing)
 {
 	preprocessCUDA<NUM_CHANNELS> << <(P + 255) / 256, 256 >> > (
 		P, D, M,
@@ -478,6 +478,7 @@ void FORWARD::preprocess(int P, int D, int M,
 		conic_opacity,
 		grid,
 		tiles_touched,
-		prefiltered
+		prefiltered,
+		antialiasing
 		);
 }
diff --git a/cuda_rasterizer/forward.h b/cuda_rasterizer/forward.h
@@ -45,7 +45,8 @@ namespace FORWARD
 		float4* conic_opacity,
 		const dim3 grid,
 		uint32_t* tiles_touched,
-		bool prefiltered);
+		bool prefiltered,
+		bool antialiasing);
 
 	// Main rasterization method.
 	void render(
diff --git a/cuda_rasterizer/rasterizer.h b/cuda_rasterizer/rasterizer.h
@@ -50,6 +50,7 @@ namespace CudaRasterizer
 			const bool prefiltered,
 			float* out_color,
 			float* depth,
+			bool antialiasing,
 			int* radii = nullptr,
 			bool debug = false);
 
@@ -85,6 +86,7 @@ namespace CudaRasterizer
 			float* dL_dsh,
 			float* dL_dscale,
 			float* dL_drot,
+			bool antialiasing,
 			bool debug);
 	};
 };
diff --git a/cuda_rasterizer/rasterizer_impl.cu b/cuda_rasterizer/rasterizer_impl.cu
@@ -217,6 +217,7 @@ int CudaRasterizer::Rasterizer::forward(
 	const bool prefiltered,
 	float* out_color,
 	float* depth,
+	bool antialiasing,
 	int* radii,
 	bool debug)
 {
@@ -270,7 +271,8 @@ int CudaRasterizer::Rasterizer::forward(
 		geomState.conic_opacity,
 		tile_grid,
 		geomState.tiles_touched,
-		prefiltered
+		prefiltered,
+		antialiasing
 	), debug)
 
 	// Compute prefix sum over full list of touched tile counts by Gaussians
@@ -372,6 +374,7 @@ void CudaRasterizer::Rasterizer::backward(
 	float* dL_dsh,
 	float* dL_dscale,
 	float* dL_drot,
+	bool antialiasing,
 	bool debug)
 {
 	GeometryState geomState = GeometryState::fromChunk(geom_buffer, P);
@@ -442,5 +445,6 @@ void CudaRasterizer::Rasterizer::backward(
 		dL_dcov3D,
 		dL_dsh,
 		(glm::vec3*)dL_dscale,
-		(glm::vec4*)dL_drot), debug);
+		(glm::vec4*)dL_drot,
+		antialiasing), debug);
 }
diff --git a/diff_gaussian_rasterization/__init__.py b/diff_gaussian_rasterization/__init__.py
@@ -76,6 +76,7 @@ def forward(
             raster_settings.sh_degree,
             raster_settings.campos,
             raster_settings.prefiltered,
+            raster_settings.antialiasing,
             raster_settings.debug
         )
 
@@ -119,6 +120,7 @@ def backward(ctx, grad_out_color, _, grad_out_depth):
                 num_rendered,
                 binningBuffer,
                 imgBuffer,
+                raster_settings.antialiasing,
                 raster_settings.debug)
 
         # Compute gradients for relevant tensors by invoking backward method
@@ -151,6 +153,7 @@ class GaussianRasterizationSettings(NamedTuple):
     campos : torch.Tensor
     prefiltered : bool
     debug : bool
+    antialiasing : bool
 
 class GaussianRasterizer(nn.Module):
     def __init__(self, raster_settings):
diff --git a/rasterize_points.cu b/rasterize_points.cu
@@ -52,6 +52,7 @@ RasterizeGaussiansCUDA(
 	const int degree,
 	const torch::Tensor& campos,
 	const bool prefiltered,
+	const bool antialiasing,
 	const bool debug)
 {
   if (means3D.ndimension() != 2 || means3D.size(1) != 3) {
@@ -115,6 +116,7 @@ RasterizeGaussiansCUDA(
 		prefiltered,
 		out_color.contiguous().data<float>(),
 		out_invdepthptr,
+		antialiasing,
 		radii.contiguous().data<int>(),
 		debug);
   }
@@ -145,7 +147,8 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Te
 	const int R,
 	const torch::Tensor& binningBuffer,
 	const torch::Tensor& imageBuffer,
-	const bool debug) 
+	const bool antialiasing,
+	const bool debug)
 {
   const int P = means3D.size(0);
   const int H = dL_dout_color.size(1);
@@ -212,6 +215,7 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Te
 	  dL_dsh.contiguous().data<float>(),
 	  dL_dscales.contiguous().data<float>(),
 	  dL_drotations.contiguous().data<float>(),
+	  antialiasing,
 	  debug);
   }
 
diff --git a/rasterize_points.h b/rasterize_points.h
@@ -35,6 +35,7 @@ RasterizeGaussiansCUDA(
 	const int degree,
 	const torch::Tensor& campos,
 	const bool prefiltered,
+	const bool antialiasing,
 	const bool debug);
 
 std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
@@ -61,6 +62,7 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Te
 	const int R,
 	const torch::Tensor& binningBuffer,
 	const torch::Tensor& imageBuffer,
+	const bool antialiasing,
 	const bool debug);
 		
 torch::Tensor markVisible(

Original file line number	Diff line number	Diff line change
`@@ -65,7 +65,8 @@ namespace BACKWARD`
`65`	`65`	`float* dL_dcov3D,`
`66`	`66`	`float* dL_dsh,`
`67`	`67`	`glm::vec3* dL_dscale,`
`68`		`- glm::vec4* dL_drot);`
	`68`	`+ glm::vec4* dL_drot,`
	`69`	`+ bool antialiasing);`
`69`	`70`	`}`
`70`	`71`
`71`	`72`	`#endif`