@@ -353,11 +353,13 @@ __global__ void preprocessCUDA(
353353 const glm::vec3* scales,
354354 const glm::vec4* rotations,
355355 const float scale_modifier,
356+ const float * view,
356357 const float * proj,
357358 const glm::vec3* campos,
358359 const float3 * dL_dmean2D,
359360 glm::vec3* dL_dmeans,
360361 float * dL_dcolor,
362+ float * dL_ddepth,
361363 float * dL_dcov3D,
362364 float * dL_dsh,
363365 glm::vec3* dL_dscale,
@@ -386,6 +388,20 @@ __global__ void preprocessCUDA(
386388 // of cov2D and following SH conversion also affects it.
387389 dL_dmeans[idx] += dL_dmean;
388390
391+ // the w must be equal to 1 for view^T * [x,y,z,1]
392+ float3 m_view = transformPoint4x3 (m, view);
393+
394+ // Compute loss gradient w.r.t. 3D means due to gradients of depth
395+ // from rendering procedure
396+ glm::vec3 dL_dmean2;
397+ float mul3 = view[2 ] * m.x + view[6 ] * m.y + view[10 ] * m.z + view[14 ];
398+ dL_dmean2.x = (view[2 ] - view[3 ] * mul3) * dL_ddepth[idx];
399+ dL_dmean2.y = (view[6 ] - view[7 ] * mul3) * dL_ddepth[idx];
400+ dL_dmean2.z = (view[10 ] - view[11 ] * mul3) * dL_ddepth[idx];
401+
402+ // That's the third part of the mean gradient.
403+ dL_dmeans[idx] += dL_dmean2;
404+
389405 // Compute gradient updates due to computing colors from SHs
390406 if (shs)
391407 computeColorFromSH (idx, D, M, (glm::vec3*)means, *campos, shs, clamped, (glm::vec3*)dL_dcolor, (glm::vec3*)dL_dmeans, (glm::vec3*)dL_dsh);
@@ -410,11 +426,12 @@ renderCUDA(
410426 const float * __restrict__ final_Ts,
411427 const uint32_t * __restrict__ n_contrib,
412428 const float * __restrict__ dL_dpixels,
413- const float * __restrict__ dL_depths ,
429+ const float * __restrict__ dL_dpixel_depths ,
414430 float3 * __restrict__ dL_dmean2D,
415431 float4 * __restrict__ dL_dconic2D,
416432 float * __restrict__ dL_dopacity,
417- float * __restrict__ dL_dcolors)
433+ float * __restrict__ dL_dcolors,
434+ float * __restrict__ dL_ddepths)
418435{
419436 // We rasterize again. Compute necessary block info.
420437 auto block = cg::this_thread_block ();
@@ -451,12 +468,12 @@ renderCUDA(
451468
452469 float accum_rec[C] = { 0 };
453470 float dL_dpixel[C];
454- float dL_depth ;
471+ float dL_dpixel_depth ;
455472 float accum_depth_rec = 0 ;
456473 if (inside){
457474 for (int i = 0 ; i < C; i++)
458475 dL_dpixel[i] = dL_dpixels[i * H * W + pix_id];
459- dL_depth = dL_depths [pix_id];
476+ dL_dpixel_depth = dL_dpixel_depths [pix_id];
460477 }
461478
462479 float last_alpha = 0 ;
@@ -483,7 +500,7 @@ renderCUDA(
483500 collected_conic_opacity[block.thread_rank ()] = conic_opacity[coll_id];
484501 for (int i = 0 ; i < C; i++)
485502 collected_colors[i * BLOCK_SIZE + block.thread_rank ()] = colors[coll_id * C + i];
486- collected_depths[block.thread_rank ()] = depths[coll_id];
503+ collected_depths[block.thread_rank ()] = depths[coll_id];
487504 }
488505 block.sync ();
489506
@@ -511,6 +528,7 @@ renderCUDA(
511528
512529 T = T / (1 .f - alpha);
513530 const float dchannel_dcolor = alpha * T;
531+ const float dpixel_depth_ddepth = alpha * T;
514532
515533 // Propagate gradients to per-Gaussian colors and keep
516534 // gradients w.r.t. alpha (blending factor for a Gaussian/pixel
@@ -534,7 +552,9 @@ renderCUDA(
534552 const float c_d = collected_depths[j];
535553 accum_depth_rec = last_alpha * last_depth + (1 .f - last_alpha) * accum_depth_rec;
536554 last_depth = c_d;
537- dL_dalpha += (c_d - accum_depth_rec) * dL_depth;
555+ dL_dalpha += (c_d - accum_depth_rec) * dL_dpixel_depth;
556+ atomicAdd (&(dL_ddepths[global_id]), dpixel_depth_ddepth * dL_dpixel_depth);
557+
538558 dL_dalpha *= T;
539559 // Update last alpha (to be used in the next iteration)
540560 last_alpha = alpha;
@@ -588,6 +608,7 @@ void BACKWARD::preprocess(
588608 const float * dL_dconic,
589609 glm::vec3* dL_dmean3D,
590610 float * dL_dcolor,
611+ float * dL_ddepth,
591612 float * dL_dcov3D,
592613 float * dL_dsh,
593614 glm::vec3* dL_dscale,
@@ -623,11 +644,13 @@ void BACKWARD::preprocess(
623644 (glm::vec3*)scales,
624645 (glm::vec4*)rotations,
625646 scale_modifier,
647+ viewmatrix,
626648 projmatrix,
627649 campos,
628650 (float3 *)dL_dmean2D,
629651 (glm::vec3*)dL_dmean3D,
630652 dL_dcolor,
653+ dL_ddepth,
631654 dL_dcov3D,
632655 dL_dsh,
633656 dL_dscale,
@@ -647,11 +670,12 @@ void BACKWARD::render(
647670 const float * final_Ts,
648671 const uint32_t * n_contrib,
649672 const float * dL_dpixels,
650- const float * dL_depths ,
673+ const float * dL_dpixel_depths ,
651674 float3 * dL_dmean2D,
652675 float4 * dL_dconic2D,
653676 float * dL_dopacity,
654- float * dL_dcolors)
677+ float * dL_dcolors,
678+ float * dL_ddepths)
655679{
656680 renderCUDA<NUM_CHANNELS> << <grid, block >> >(
657681 ranges,
@@ -665,10 +689,11 @@ void BACKWARD::render(
665689 final_Ts,
666690 n_contrib,
667691 dL_dpixels,
668- dL_depths ,
692+ dL_dpixel_depths ,
669693 dL_dmean2D,
670694 dL_dconic2D,
671695 dL_dopacity,
672- dL_dcolors
696+ dL_dcolors,
697+ dL_ddepths
673698 );
674699}
0 commit comments