Skip to content

Commit 73917be

Browse files
author
bkerbl
committed
Debug functionality
1 parent f6f13c6 commit 73917be

File tree

6 files changed

+74
-31
lines changed

6 files changed

+74
-31
lines changed

cuda_rasterizer/auxiliary.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,4 +163,13 @@ __forceinline__ __device__ bool in_frustum(int idx,
163163
return true;
164164
}
165165

166+
#define CHECK_CUDA(A, debug) \
167+
A; if(debug) { \
168+
auto ret = cudaDeviceSynchronize(); \
169+
if (ret != cudaSuccess) { \
170+
std::cerr << "\n[CUDA ERROR] in " << __FILE__ << "\nLine " << __LINE__ << ": " << cudaGetErrorString(ret); \
171+
throw std::runtime_error(cudaGetErrorString(ret)); \
172+
} \
173+
}
174+
166175
#endif

cuda_rasterizer/rasterizer.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ namespace CudaRasterizer
4949
const float tan_fovx, float tan_fovy,
5050
const bool prefiltered,
5151
float* out_color,
52-
int* radii = nullptr);
52+
int* radii = nullptr,
53+
bool debug = false);
5354

5455
static void backward(
5556
const int P, int D, int M, int R,
@@ -79,7 +80,8 @@ namespace CudaRasterizer
7980
float* dL_dcov3D,
8081
float* dL_dsh,
8182
float* dL_dscale,
82-
float* dL_drot);
83+
float* dL_drot,
84+
bool debug);
8385
};
8486
};
8587

cuda_rasterizer/rasterizer_impl.cu

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,8 @@ int CudaRasterizer::Rasterizer::forward(
216216
const float tan_fovx, float tan_fovy,
217217
const bool prefiltered,
218218
float* out_color,
219-
int* radii)
219+
int* radii,
220+
bool debug)
220221
{
221222
const float focal_y = height / (2.0f * tan_fovy);
222223
const float focal_x = width / (2.0f * tan_fovx);
@@ -244,7 +245,7 @@ int CudaRasterizer::Rasterizer::forward(
244245
}
245246

246247
// Run preprocessing per-Gaussian (transformation, bounding, conversion of SHs to RGB)
247-
FORWARD::preprocess(
248+
CHECK_CUDA(FORWARD::preprocess(
248249
P, D, M,
249250
means3D,
250251
(glm::vec3*)scales,
@@ -269,16 +270,15 @@ int CudaRasterizer::Rasterizer::forward(
269270
tile_grid,
270271
geomState.tiles_touched,
271272
prefiltered
272-
);
273+
), debug)
273274

274275
// Compute prefix sum over full list of touched tile counts by Gaussians
275276
// E.g., [2, 3, 0, 2, 1] -> [2, 5, 5, 7, 8]
276-
cub::DeviceScan::InclusiveSum(geomState.scanning_space, geomState.scan_size,
277-
geomState.tiles_touched, geomState.point_offsets, P);
277+
CHECK_CUDA(cub::DeviceScan::InclusiveSum(geomState.scanning_space, geomState.scan_size, geomState.tiles_touched, geomState.point_offsets, P), debug)
278278

279279
// Retrieve total number of Gaussian instances to launch and resize aux buffers
280280
int num_rendered;
281-
cudaMemcpy(&num_rendered, geomState.point_offsets + P - 1, sizeof(int), cudaMemcpyDeviceToHost);
281+
CHECK_CUDA(cudaMemcpy(&num_rendered, geomState.point_offsets + P - 1, sizeof(int), cudaMemcpyDeviceToHost), debug);
282282

283283
size_t binning_chunk_size = required<BinningState>(num_rendered);
284284
char* binning_chunkptr = binningBuffer(binning_chunk_size);
@@ -294,32 +294,32 @@ int CudaRasterizer::Rasterizer::forward(
294294
binningState.point_list_keys_unsorted,
295295
binningState.point_list_unsorted,
296296
radii,
297-
tile_grid
298-
);
297+
tile_grid)
298+
CHECK_CUDA(, debug)
299299

300300
int bit = getHigherMsb(tile_grid.x * tile_grid.y);
301301

302302
// Sort complete list of (duplicated) Gaussian indices by keys
303-
cub::DeviceRadixSort::SortPairs(
303+
CHECK_CUDA(cub::DeviceRadixSort::SortPairs(
304304
binningState.list_sorting_space,
305305
binningState.sorting_size,
306306
binningState.point_list_keys_unsorted, binningState.point_list_keys,
307307
binningState.point_list_unsorted, binningState.point_list,
308-
num_rendered, 0, 32 + bit);
308+
num_rendered, 0, 32 + bit), debug)
309309

310-
cudaMemset(imgState.ranges, 0, tile_grid.x * tile_grid.y * sizeof(uint2));
310+
CHECK_CUDA(cudaMemset(imgState.ranges, 0, tile_grid.x * tile_grid.y * sizeof(uint2)), debug);
311311

312312
// Identify start and end of per-tile workloads in sorted list
313313
if (num_rendered > 0)
314314
identifyTileRanges << <(num_rendered + 255) / 256, 256 >> > (
315315
num_rendered,
316316
binningState.point_list_keys,
317-
imgState.ranges
318-
);
317+
imgState.ranges);
318+
CHECK_CUDA(, debug)
319319

320320
// Let each tile blend its range of Gaussians independently in parallel
321321
const float* feature_ptr = colors_precomp != nullptr ? colors_precomp : geomState.rgb;
322-
FORWARD::render(
322+
CHECK_CUDA(FORWARD::render(
323323
tile_grid, block,
324324
imgState.ranges,
325325
binningState.point_list,
@@ -330,7 +330,7 @@ int CudaRasterizer::Rasterizer::forward(
330330
imgState.accum_alpha,
331331
imgState.n_contrib,
332332
background,
333-
out_color);
333+
out_color), debug)
334334

335335
return num_rendered;
336336
}
@@ -365,7 +365,8 @@ void CudaRasterizer::Rasterizer::backward(
365365
float* dL_dcov3D,
366366
float* dL_dsh,
367367
float* dL_dscale,
368-
float* dL_drot)
368+
float* dL_drot,
369+
bool debug)
369370
{
370371
GeometryState geomState = GeometryState::fromChunk(geom_buffer, P);
371372
BinningState binningState = BinningState::fromChunk(binning_buffer, R);
@@ -386,7 +387,7 @@ void CudaRasterizer::Rasterizer::backward(
386387
// opacity and RGB of Gaussians from per-pixel loss gradients.
387388
// If we were given precomputed colors and not SHs, use them.
388389
const float* color_ptr = (colors_precomp != nullptr) ? colors_precomp : geomState.rgb;
389-
BACKWARD::render(
390+
CHECK_CUDA(BACKWARD::render(
390391
tile_grid,
391392
block,
392393
imgState.ranges,
@@ -402,13 +403,13 @@ void CudaRasterizer::Rasterizer::backward(
402403
(float3*)dL_dmean2D,
403404
(float4*)dL_dconic,
404405
dL_dopacity,
405-
dL_dcolor);
406+
dL_dcolor), debug)
406407

407408
// Take care of the rest of preprocessing. Was the precomputed covariance
408409
// given to us or a scales/rot pair? If precomputed, pass that. If not,
409410
// use the one we computed ourselves.
410411
const float* cov3D_ptr = (cov3D_precomp != nullptr) ? cov3D_precomp : geomState.cov3D;
411-
BACKWARD::preprocess(P, D, M,
412+
CHECK_CUDA(BACKWARD::preprocess(P, D, M,
412413
(float3*)means3D,
413414
radii,
414415
shs,
@@ -429,5 +430,5 @@ void CudaRasterizer::Rasterizer::backward(
429430
dL_dcov3D,
430431
dL_dsh,
431432
(glm::vec3*)dL_dscale,
432-
(glm::vec4*)dL_drot);
433+
(glm::vec4*)dL_drot), debug)
433434
}

diff_gaussian_rasterization/__init__.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
import torch
1515
from . import _C
1616

17+
def cpu_deep_copy_tuple(input_tuple):
18+
copied_tensors = [item.cpu().clone() if isinstance(item, torch.Tensor) else item for item in input_tuple]
19+
return tuple(copied_tensors)
20+
1721
def rasterize_gaussians(
1822
means3D,
1923
means2D,
@@ -72,10 +76,20 @@ def forward(
7276
raster_settings.sh_degree,
7377
raster_settings.campos,
7478
raster_settings.prefiltered,
79+
raster_settings.debug
7580
)
7681

7782
# Invoke C++/CUDA rasterizer
78-
num_rendered, color, radii, geomBuffer, binningBuffer, imgBuffer = _C.rasterize_gaussians(*args)
83+
if raster_settings.debug:
84+
cpu_args = cpu_deep_copy_tuple(args) # Copy them before they can be corrupted
85+
try:
86+
num_rendered, color, radii, geomBuffer, binningBuffer, imgBuffer = _C.rasterize_gaussians(*args)
87+
except Exception as ex:
88+
torch.save(cpu_args, "snapshot_fw.dump")
89+
print("\nAn error occured in forward. Please forward snapshot_fw.dump for debugging.")
90+
raise ex
91+
else:
92+
num_rendered, color, radii, geomBuffer, binningBuffer, imgBuffer = _C.rasterize_gaussians(*args)
7993

8094
# Keep relevant tensors for backward
8195
ctx.raster_settings = raster_settings
@@ -111,10 +125,20 @@ def backward(ctx, grad_out_color, _):
111125
geomBuffer,
112126
num_rendered,
113127
binningBuffer,
114-
imgBuffer)
128+
imgBuffer,
129+
raster_settings.debug)
115130

116131
# Compute gradients for relevant tensors by invoking backward method
117-
grad_means2D, grad_colors_precomp, grad_opacities, grad_means3D, grad_cov3Ds_precomp, grad_sh, grad_scales, grad_rotations = _C.rasterize_gaussians_backward(*args)
132+
if raster_settings.debug:
133+
cpu_args = cpu_deep_copy_tuple(args) # Copy them before they can be corrupted
134+
try:
135+
grad_means2D, grad_colors_precomp, grad_opacities, grad_means3D, grad_cov3Ds_precomp, grad_sh, grad_scales, grad_rotations = _C.rasterize_gaussians_backward(*args)
136+
except Exception as ex:
137+
print("\nAn error occured in backward. Writing snapshot_bw.dump for debugging.\n")
138+
torch.save(cpu_args, "snapshot_bw.dump")
139+
raise ex
140+
else:
141+
grad_means2D, grad_colors_precomp, grad_opacities, grad_means3D, grad_cov3Ds_precomp, grad_sh, grad_scales, grad_rotations = _C.rasterize_gaussians_backward(*args)
118142

119143
grads = (
120144
grad_means3D,
@@ -142,6 +166,7 @@ class GaussianRasterizationSettings(NamedTuple):
142166
sh_degree : int
143167
campos : torch.Tensor
144168
prefiltered : bool
169+
debug : bool
145170

146171
class GaussianRasterizer(nn.Module):
147172
def __init__(self, raster_settings):

rasterize_points.cu

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ RasterizeGaussiansCUDA(
5151
const torch::Tensor& sh,
5252
const int degree,
5353
const torch::Tensor& campos,
54-
const bool prefiltered)
54+
const bool prefiltered,
55+
const bool debug)
5556
{
5657
if (means3D.ndimension() != 2 || means3D.size(1) != 3) {
5758
AT_ERROR("means3D must have dimensions (num_points, 3)");
@@ -107,7 +108,8 @@ RasterizeGaussiansCUDA(
107108
tan_fovy,
108109
prefiltered,
109110
out_color.contiguous().data<float>(),
110-
radii.contiguous().data<int>());
111+
radii.contiguous().data<int>(),
112+
debug);
111113
}
112114
return std::make_tuple(rendered, out_color, radii, geomBuffer, binningBuffer, imgBuffer);
113115
}
@@ -133,7 +135,8 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Te
133135
const torch::Tensor& geomBuffer,
134136
const int R,
135137
const torch::Tensor& binningBuffer,
136-
const torch::Tensor& imageBuffer)
138+
const torch::Tensor& imageBuffer,
139+
const bool debug)
137140
{
138141
const int P = means3D.size(0);
139142
const int H = dL_dout_color.size(1);
@@ -185,7 +188,8 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Te
185188
dL_dcov3D.contiguous().data<float>(),
186189
dL_dsh.contiguous().data<float>(),
187190
dL_dscales.contiguous().data<float>(),
188-
dL_drotations.contiguous().data<float>());
191+
dL_drotations.contiguous().data<float>(),
192+
debug);
189193
}
190194

191195
return std::make_tuple(dL_dmeans2D, dL_dcolors, dL_dopacity, dL_dmeans3D, dL_dcov3D, dL_dsh, dL_dscales, dL_drotations);

rasterize_points.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ RasterizeGaussiansCUDA(
3434
const torch::Tensor& sh,
3535
const int degree,
3636
const torch::Tensor& campos,
37-
const bool prefiltered);
37+
const bool prefiltered,
38+
const bool debug);
3839

3940
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
4041
RasterizeGaussiansBackwardCUDA(
@@ -57,7 +58,8 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Te
5758
const torch::Tensor& geomBuffer,
5859
const int R,
5960
const torch::Tensor& binningBuffer,
60-
const torch::Tensor& imageBuffer);
61+
const torch::Tensor& imageBuffer,
62+
const bool debug);
6163

6264
torch::Tensor markVisible(
6365
torch::Tensor& means3D,

0 commit comments

Comments
 (0)