Skip to content

Commit 00b169c

Browse files
committed
Updating CUDA extension and removing CUFFT callbacks
1 parent b75c0ce commit 00b169c

File tree

4 files changed

+622
-382
lines changed

4 files changed

+622
-382
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ if(CMAKE_CUDA_COMPILER)
4040
OPTIONAL_COMPONENTS Development.SABIModule)
4141
execute_process(
4242
COMMAND "${Python_EXECUTABLE}" "-c"
43-
"from jax.extend import ffi; print(ffi.include_dir())"
43+
"from jax import ffi; print(ffi.include_dir())"
4444
OUTPUT_STRIP_TRAILING_WHITESPACE
4545
OUTPUT_VARIABLE XLA_DIR)
4646
message(STATUS "XLA include directory: ${XLA_DIR}")

lib/include/s2fft_callbacks.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
/**
2+
* @file s2fft_callbacks.h
3+
* @brief CUDA CUFFT callbacks for HEALPix spherical harmonic transforms
4+
*
5+
* @note CUFFT CALLBACKS DEPRECATED: This implementation no longer uses cuFFT callbacks.
6+
* The previous callback-based approach has been replaced with direct kernel launches
7+
* for better performance and maintainability. The files s2fft_callbacks.h and
8+
* s2fft_callbacks.cc are no longer used and can be considered orphaned.
9+
*/
10+
11+
112
#ifndef _S2FFT_CALLBACKS_CUH_
213
#define _S2FFT_CALLBACKS_CUH_
314

lib/include/s2fft_kernels.h

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,77 @@
99
#include <cufftXt.h>
1010
typedef long long int int64;
1111

12+
/**
13+
* @file s2fft_kernels.h
14+
* @brief CUDA kernels for HEALPix spherical harmonic transforms
15+
*
16+
* @note CUFT CALLBACKS DEPRECATED: This implementation no longer uses cuFFT callbacks.
17+
* The previous callback-based approach has been replaced with direct kernel launches
18+
* for better performance and maintainability. The files s2fft_callbacks.h and
19+
* s2fft_callbacks.cc are no longer used and can be considered orphaned.
20+
*/
21+
1222
namespace s2fftKernels {
1323

1424
enum fft_norm { FORWARD = 1, BACKWARD = 2, ORTHO = 3, NONE = 4 };
1525

26+
/**
27+
* @brief Launches the spectral folding CUDA kernel.
28+
*
29+
* This function configures and launches the spectral_folding kernel with
30+
* appropriate grid and block dimensions. It performs spectral folding operations
31+
* on ring-ordered data, transforming from Fourier coefficient space to HEALPix
32+
* pixel space with optional FFT shifting.
33+
*
34+
* @tparam complex The complex type (cufftComplex or cufftDoubleComplex).
35+
* @param data Input data array containing Fourier coefficients per ring.
36+
* @param output Output array for folded HEALPix pixel data.
37+
* @param nside The HEALPix Nside parameter.
38+
* @param L The harmonic band limit.
39+
* @param shift Flag indicating whether to apply FFT shifting.
40+
* @param stream CUDA stream for kernel execution.
41+
* @return HRESULT indicating success or failure.
42+
*/
1643
template <typename complex>
1744
HRESULT launch_spectral_folding(complex* data, complex* output, const int& nside, const int& L,
1845
const bool& shift, cudaStream_t stream);
46+
47+
/**
48+
* @brief Launches the spectral extension CUDA kernel.
49+
*
50+
* This function configures and launches the spectral_extension kernel with
51+
* appropriate grid and block dimensions. It performs the inverse operation of
52+
* spectral folding, extending HEALPix pixel data back to full Fourier coefficient
53+
* space by mapping folded frequency components to their appropriate positions.
54+
*
55+
* @tparam complex The complex type (cufftComplex or cufftDoubleComplex).
56+
* @param data Input array containing folded HEALPix pixel data.
57+
* @param output Output array for extended Fourier coefficients per ring.
58+
* @param nside The HEALPix Nside parameter.
59+
* @param L The harmonic band limit.
60+
* @param stream CUDA stream for kernel execution.
61+
* @return HRESULT indicating success or failure.
62+
*/
1963
template <typename complex>
2064
HRESULT launch_spectral_extension(complex* data, complex* output, const int& nside, const int& L,
2165
cudaStream_t stream);
2266

67+
/**
68+
* @brief Launches the shift/normalize CUDA kernel for HEALPix data processing.
69+
*
70+
* This function configures and launches the shift_normalize_kernel with appropriate
71+
* grid and block dimensions. It handles both single and double precision complex
72+
* types and applies the requested normalization and shifting operations to HEALPix
73+
* pixel data on a per-ring basis.
74+
*
75+
* @tparam complex The complex type (cufftComplex or cufftDoubleComplex).
76+
* @param stream CUDA stream for kernel execution.
77+
* @param data Input/output array of HEALPix pixel data (in-place processing).
78+
* @param nside The HEALPix Nside parameter.
79+
* @param apply_shift Flag indicating whether to apply FFT shifting.
80+
* @param norm Normalization type (0=by nphi, 1=by sqrt(nphi), 2=no normalization).
81+
* @return HRESULT indicating success or failure.
82+
*/
2383
template <typename complex>
2484
HRESULT launch_shift_normalize_kernel(cudaStream_t stream,
2585
complex* data, // In-place data buffer

0 commit comments

Comments
 (0)