@@ -406,9 +406,11 @@ renderCUDA(
406
406
const float2 * __restrict__ points_xy_image,
407
407
const float4 * __restrict__ conic_opacity,
408
408
const float * __restrict__ colors,
409
+ const float * __restrict__ depths,
409
410
const float * __restrict__ final_Ts,
410
411
const uint32_t * __restrict__ n_contrib,
411
412
const float * __restrict__ dL_dpixels,
413
+ const float * __restrict__ dL_depths,
412
414
float3 * __restrict__ dL_dmean2D,
413
415
float4 * __restrict__ dL_dconic2D,
414
416
float * __restrict__ dL_dopacity,
@@ -435,6 +437,7 @@ renderCUDA(
435
437
__shared__ float2 collected_xy[BLOCK_SIZE];
436
438
__shared__ float4 collected_conic_opacity[BLOCK_SIZE];
437
439
__shared__ float collected_colors[C * BLOCK_SIZE];
440
+ __shared__ float collected_depths[BLOCK_SIZE];
438
441
439
442
// In the forward, we stored the final value for T, the
440
443
// product of all (1 - alpha) factors.
@@ -448,12 +451,16 @@ renderCUDA(
448
451
449
452
float accum_rec[C] = { 0 };
450
453
float dL_dpixel[C];
454
+ float dL_depth;
455
+ float accum_depth_rec = 0 ;
451
456
if (inside)
452
457
for (int i = 0 ; i < C; i++)
453
458
dL_dpixel[i] = dL_dpixels[i * H * W + pix_id];
459
+ dL_depth = dL_depths[pix_id];
454
460
455
461
float last_alpha = 0 ;
456
462
float last_color[C] = { 0 };
463
+ float last_depth = 0 ;
457
464
458
465
// Gradient of pixel coordinate w.r.t. normalized
459
466
// screen-space viewport corrdinates (-1 to 1)
@@ -475,6 +482,7 @@ renderCUDA(
475
482
collected_conic_opacity[block.thread_rank ()] = conic_opacity[coll_id];
476
483
for (int i = 0 ; i < C; i++)
477
484
collected_colors[i * BLOCK_SIZE + block.thread_rank ()] = colors[coll_id * C + i];
485
+ collected_depths[block.thread_rank ()] = depths[coll_id];
478
486
}
479
487
block.sync ();
480
488
@@ -522,6 +530,10 @@ renderCUDA(
522
530
// many that were affected by this Gaussian.
523
531
atomicAdd (&(dL_dcolors[global_id * C + ch]), dchannel_dcolor * dL_dchannel);
524
532
}
533
+ const float c_d = collected_depths[j];
534
+ accum_depth_rec = last_alpha * last_depth + (1 .f - last_alpha) * accum_depth_rec;
535
+ last_depth = c_d;
536
+ dL_dalpha += (c_d - accum_depth_rec) * dL_depth;
525
537
dL_dalpha *= T;
526
538
// Update last alpha (to be used in the next iteration)
527
539
last_alpha = alpha;
@@ -630,9 +642,11 @@ void BACKWARD::render(
630
642
const float2 * means2D,
631
643
const float4 * conic_opacity,
632
644
const float * colors,
645
+ const float * depths,
633
646
const float * final_Ts,
634
647
const uint32_t * n_contrib,
635
648
const float * dL_dpixels,
649
+ const float * dL_depths,
636
650
float3 * dL_dmean2D,
637
651
float4 * dL_dconic2D,
638
652
float * dL_dopacity,
@@ -646,12 +660,14 @@ void BACKWARD::render(
646
660
means2D,
647
661
conic_opacity,
648
662
colors,
663
+ depths,
649
664
final_Ts,
650
665
n_contrib,
651
666
dL_dpixels,
667
+ dL_depths,
652
668
dL_dmean2D,
653
669
dL_dconic2D,
654
670
dL_dopacity,
655
671
dL_dcolors
656
672
);
657
- }
673
+ }
0 commit comments