Skip to content

Commit 9c5c202

Browse files
committed
toggle antialiasing
1 parent eb01570 commit 9c5c202

File tree

10 files changed

+71
-48
lines changed

10 files changed

+71
-48
lines changed

cuda_rasterizer/auxiliary.h

-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
#define BLOCK_SIZE (BLOCK_X * BLOCK_Y)
1919
#define NUM_WARPS (BLOCK_SIZE/32)
20-
#define DGR_FIX_AA
2120
// Spherical harmonics coefficients
2221
__device__ const float SH_C0 = 0.28209479177387814f;
2322
__device__ const float SH_C1 = 0.4886025119029199f;

cuda_rasterizer/backward.cu

+38-32
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,8 @@ __global__ void computeCov2DCUDA(int P,
156156
float* dL_dopacity,
157157
const float* dL_dinvdepth,
158158
float3* dL_dmeans,
159-
float* dL_dcov)
159+
float* dL_dcov,
160+
bool antialiasing)
160161
{
161162
auto idx = cg::this_grid().thread_rank();
162163
if (idx >= P || !(radii[idx] > 0))
@@ -205,41 +206,44 @@ __global__ void computeCov2DCUDA(int P,
205206
float c_yy = cov2D[1][1];
206207

207208
constexpr float h_var = 0.3f;
208-
#ifdef DGR_FIX_AA
209-
const float det_cov = c_xx * c_yy - c_xy * c_xy;
210-
c_xx += h_var;
211-
c_yy += h_var;
212-
const float det_cov_plus_h_cov = c_xx * c_yy - c_xy * c_xy;
213-
const float h_convolution_scaling = sqrt(max(0.000025f, det_cov / det_cov_plus_h_cov)); // max for numerical stability
214-
const float dL_dopacity_v = dL_dopacity[idx];
215-
const float d_h_convolution_scaling = dL_dopacity_v * opacities[idx];
216-
dL_dopacity[idx] = dL_dopacity_v * h_convolution_scaling;
217-
const float d_inside_root = (det_cov / det_cov_plus_h_cov) <= 0.000025f ? 0.f : d_h_convolution_scaling / (2 * h_convolution_scaling);
218-
#else
219-
c_xx += h_var;
220-
c_yy += h_var;
221-
#endif
209+
float d_inside_root = 0.f;
210+
if(antialiasing)
211+
{
212+
const float det_cov = c_xx * c_yy - c_xy * c_xy;
213+
c_xx += h_var;
214+
c_yy += h_var;
215+
const float det_cov_plus_h_cov = c_xx * c_yy - c_xy * c_xy;
216+
const float h_convolution_scaling = sqrt(max(0.000025f, det_cov / det_cov_plus_h_cov)); // max for numerical stability
217+
const float dL_dopacity_v = dL_dopacity[idx];
218+
const float d_h_convolution_scaling = dL_dopacity_v * opacities[idx];
219+
dL_dopacity[idx] = dL_dopacity_v * h_convolution_scaling;
220+
d_inside_root = (det_cov / det_cov_plus_h_cov) <= 0.000025f ? 0.f : d_h_convolution_scaling / (2 * h_convolution_scaling);
221+
}
222+
else
223+
{
224+
c_xx += h_var;
225+
c_yy += h_var;
226+
}
222227

223228
float dL_dc_xx = 0;
224229
float dL_dc_xy = 0;
225230
float dL_dc_yy = 0;
226-
#ifdef DGR_FIX_AA
231+
if(antialiasing)
227232
{
228-
// https://www.wolframalpha.com/input?i=d+%28%28x*y+-+z%5E2%29%2F%28%28x%2Bw%29*%28y%2Bw%29+-+z%5E2%29%29+%2Fdx
229-
// https://www.wolframalpha.com/input?i=d+%28%28x*y+-+z%5E2%29%2F%28%28x%2Bw%29*%28y%2Bw%29+-+z%5E2%29%29+%2Fdz
230-
const float x = c_xx;
231-
const float y = c_yy;
232-
const float z = c_xy;
233-
const float w = h_var;
234-
const float denom_f = d_inside_root / sq(w * w + w * (x + y) + x * y - z * z);
235-
const float dL_dx = w * (w * y + y * y + z * z) * denom_f;
236-
const float dL_dy = w * (w * x + x * x + z * z) * denom_f;
237-
const float dL_dz = -2.f * w * z * (w + x + y) * denom_f;
238-
dL_dc_xx = dL_dx;
239-
dL_dc_yy = dL_dy;
240-
dL_dc_xy = dL_dz;
233+
// https://www.wolframalpha.com/input?i=d+%28%28x*y+-+z%5E2%29%2F%28%28x%2Bw%29*%28y%2Bw%29+-+z%5E2%29%29+%2Fdx
234+
// https://www.wolframalpha.com/input?i=d+%28%28x*y+-+z%5E2%29%2F%28%28x%2Bw%29*%28y%2Bw%29+-+z%5E2%29%29+%2Fdz
235+
const float x = c_xx;
236+
const float y = c_yy;
237+
const float z = c_xy;
238+
const float w = h_var;
239+
const float denom_f = d_inside_root / sq(w * w + w * (x + y) + x * y - z * z);
240+
const float dL_dx = w * (w * y + y * y + z * z) * denom_f;
241+
const float dL_dy = w * (w * x + x * x + z * z) * denom_f;
242+
const float dL_dz = -2.f * w * z * (w + x + y) * denom_f;
243+
dL_dc_xx = dL_dx;
244+
dL_dc_yy = dL_dy;
245+
dL_dc_xy = dL_dz;
241246
}
242-
#endif
243247

244248
float denom = c_xx * c_yy - c_xy * c_xy;
245249

@@ -658,7 +662,8 @@ void BACKWARD::preprocess(
658662
float* dL_dcov3D,
659663
float* dL_dsh,
660664
glm::vec3* dL_dscale,
661-
glm::vec4* dL_drot)
665+
glm::vec4* dL_drot,
666+
bool antialiasing)
662667
{
663668
// Propagate gradients for the path of 2D conic matrix computation.
664669
// Somewhat long, thus it is its own kernel rather than being part of
@@ -679,7 +684,8 @@ void BACKWARD::preprocess(
679684
dL_dopacity,
680685
dL_dinvdepth,
681686
(float3*)dL_dmean3D,
682-
dL_dcov3D);
687+
dL_dcov3D,
688+
antialiasing);
683689

684690
// Propagate gradients for remaining steps: finish 3D mean gradients,
685691
// propagate color gradients to SH (if desireD), propagate 3D covariance

cuda_rasterizer/backward.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ namespace BACKWARD
6565
float* dL_dcov3D,
6666
float* dL_dsh,
6767
glm::vec3* dL_dscale,
68-
glm::vec4* dL_drot);
68+
glm::vec4* dL_drot,
69+
bool antialiasing);
6970
}
7071

7172
#endif

cuda_rasterizer/forward.cu

+11-10
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,8 @@ __global__ void preprocessCUDA(int P, int D, int M,
173173
float4* conic_opacity,
174174
const dim3 grid,
175175
uint32_t* tiles_touched,
176-
bool prefiltered)
176+
bool prefiltered,
177+
bool antialiasing)
177178
{
178179
auto idx = cg::this_grid().thread_rank();
179180
if (idx >= P)
@@ -216,10 +217,10 @@ __global__ void preprocessCUDA(int P, int D, int M,
216217
cov.x += h_var;
217218
cov.z += h_var;
218219
const float det_cov_plus_h_cov = cov.x * cov.z - cov.y * cov.y;
220+
float h_convolution_scaling = 1.0f;
219221

220-
#ifdef DGR_FIX_AA
221-
const float h_convolution_scaling = sqrt(max(0.000025f, det_cov / det_cov_plus_h_cov)); // max for numerical stability
222-
#endif
222+
if(antialiasing)
223+
h_convolution_scaling = sqrt(max(0.000025f, det_cov / det_cov_plus_h_cov)); // max for numerical stability
223224

224225
// Invert covariance (EWA algorithm)
225226
const float det = det_cov_plus_h_cov;
@@ -260,11 +261,9 @@ __global__ void preprocessCUDA(int P, int D, int M,
260261
// Inverse 2D covariance and opacity neatly pack into one float4
261262
float opacity = opacities[idx];
262263

263-
#ifdef DGR_FIX_AA
264+
264265
conic_opacity[idx] = { conic.x, conic.y, conic.z, opacity * h_convolution_scaling };
265-
#else
266-
conic_opacity[idx] = { conic.x, conic.y, conic.z, opacity };
267-
#endif
266+
268267

269268
tiles_touched[idx] = (rect_max.y - rect_min.y) * (rect_max.x - rect_min.x);
270269
}
@@ -451,7 +450,8 @@ void FORWARD::preprocess(int P, int D, int M,
451450
float4* conic_opacity,
452451
const dim3 grid,
453452
uint32_t* tiles_touched,
454-
bool prefiltered)
453+
bool prefiltered,
454+
bool antialiasing)
455455
{
456456
preprocessCUDA<NUM_CHANNELS> << <(P + 255) / 256, 256 >> > (
457457
P, D, M,
@@ -478,6 +478,7 @@ void FORWARD::preprocess(int P, int D, int M,
478478
conic_opacity,
479479
grid,
480480
tiles_touched,
481-
prefiltered
481+
prefiltered,
482+
antialiasing
482483
);
483484
}

cuda_rasterizer/forward.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ namespace FORWARD
4545
float4* conic_opacity,
4646
const dim3 grid,
4747
uint32_t* tiles_touched,
48-
bool prefiltered);
48+
bool prefiltered,
49+
bool antialiasing);
4950

5051
// Main rasterization method.
5152
void render(

cuda_rasterizer/rasterizer.h

+2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ namespace CudaRasterizer
5050
const bool prefiltered,
5151
float* out_color,
5252
float* depth,
53+
bool antialiasing,
5354
int* radii = nullptr,
5455
bool debug = false);
5556

@@ -85,6 +86,7 @@ namespace CudaRasterizer
8586
float* dL_dsh,
8687
float* dL_dscale,
8788
float* dL_drot,
89+
bool antialiasing,
8890
bool debug);
8991
};
9092
};

cuda_rasterizer/rasterizer_impl.cu

+6-2
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ int CudaRasterizer::Rasterizer::forward(
217217
const bool prefiltered,
218218
float* out_color,
219219
float* depth,
220+
bool antialiasing,
220221
int* radii,
221222
bool debug)
222223
{
@@ -270,7 +271,8 @@ int CudaRasterizer::Rasterizer::forward(
270271
geomState.conic_opacity,
271272
tile_grid,
272273
geomState.tiles_touched,
273-
prefiltered
274+
prefiltered,
275+
antialiasing
274276
), debug)
275277

276278
// Compute prefix sum over full list of touched tile counts by Gaussians
@@ -372,6 +374,7 @@ void CudaRasterizer::Rasterizer::backward(
372374
float* dL_dsh,
373375
float* dL_dscale,
374376
float* dL_drot,
377+
bool antialiasing,
375378
bool debug)
376379
{
377380
GeometryState geomState = GeometryState::fromChunk(geom_buffer, P);
@@ -442,5 +445,6 @@ void CudaRasterizer::Rasterizer::backward(
442445
dL_dcov3D,
443446
dL_dsh,
444447
(glm::vec3*)dL_dscale,
445-
(glm::vec4*)dL_drot), debug);
448+
(glm::vec4*)dL_drot,
449+
antialiasing), debug);
446450
}

diff_gaussian_rasterization/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def forward(
7676
raster_settings.sh_degree,
7777
raster_settings.campos,
7878
raster_settings.prefiltered,
79+
raster_settings.antialiasing,
7980
raster_settings.debug
8081
)
8182

@@ -119,6 +120,7 @@ def backward(ctx, grad_out_color, _, grad_out_depth):
119120
num_rendered,
120121
binningBuffer,
121122
imgBuffer,
123+
raster_settings.antialiasing,
122124
raster_settings.debug)
123125

124126
# Compute gradients for relevant tensors by invoking backward method
@@ -151,6 +153,7 @@ class GaussianRasterizationSettings(NamedTuple):
151153
campos : torch.Tensor
152154
prefiltered : bool
153155
debug : bool
156+
antialiasing : bool
154157

155158
class GaussianRasterizer(nn.Module):
156159
def __init__(self, raster_settings):

rasterize_points.cu

+5-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ RasterizeGaussiansCUDA(
5252
const int degree,
5353
const torch::Tensor& campos,
5454
const bool prefiltered,
55+
const bool antialiasing,
5556
const bool debug)
5657
{
5758
if (means3D.ndimension() != 2 || means3D.size(1) != 3) {
@@ -115,6 +116,7 @@ RasterizeGaussiansCUDA(
115116
prefiltered,
116117
out_color.contiguous().data<float>(),
117118
out_invdepthptr,
119+
antialiasing,
118120
radii.contiguous().data<int>(),
119121
debug);
120122
}
@@ -145,7 +147,8 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Te
145147
const int R,
146148
const torch::Tensor& binningBuffer,
147149
const torch::Tensor& imageBuffer,
148-
const bool debug)
150+
const bool antialiasing,
151+
const bool debug)
149152
{
150153
const int P = means3D.size(0);
151154
const int H = dL_dout_color.size(1);
@@ -212,6 +215,7 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Te
212215
dL_dsh.contiguous().data<float>(),
213216
dL_dscales.contiguous().data<float>(),
214217
dL_drotations.contiguous().data<float>(),
218+
antialiasing,
215219
debug);
216220
}
217221

rasterize_points.h

+2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ RasterizeGaussiansCUDA(
3535
const int degree,
3636
const torch::Tensor& campos,
3737
const bool prefiltered,
38+
const bool antialiasing,
3839
const bool debug);
3940

4041
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
@@ -61,6 +62,7 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Te
6162
const int R,
6263
const torch::Tensor& binningBuffer,
6364
const torch::Tensor& imageBuffer,
65+
const bool antialiasing,
6466
const bool debug);
6567

6668
torch::Tensor markVisible(

0 commit comments

Comments
 (0)