|
9 | 9 | #include <cufftXt.h>
|
10 | 10 | typedef long long int int64;
|
11 | 11 |
|
| 12 | +/** |
| 13 | + * @file s2fft_kernels.h |
| 14 | + * @brief CUDA kernels for HEALPix spherical harmonic transforms |
| 15 | + * |
| 16 | + * @note CUFT CALLBACKS DEPRECATED: This implementation no longer uses cuFFT callbacks. |
| 17 | + * The previous callback-based approach has been replaced with direct kernel launches |
| 18 | + * for better performance and maintainability. The files s2fft_callbacks.h and |
| 19 | + * s2fft_callbacks.cc are no longer used and can be considered orphaned. |
| 20 | + */ |
| 21 | + |
12 | 22 | namespace s2fftKernels {
|
13 | 23 |
|
14 | 24 | enum fft_norm { FORWARD = 1, BACKWARD = 2, ORTHO = 3, NONE = 4 };
|
15 | 25 |
|
| 26 | +/** |
| 27 | + * @brief Launches the spectral folding CUDA kernel. |
| 28 | + * |
| 29 | + * This function configures and launches the spectral_folding kernel with |
| 30 | + * appropriate grid and block dimensions. It performs spectral folding operations |
| 31 | + * on ring-ordered data, transforming from Fourier coefficient space to HEALPix |
| 32 | + * pixel space with optional FFT shifting. |
| 33 | + * |
| 34 | + * @tparam complex The complex type (cufftComplex or cufftDoubleComplex). |
| 35 | + * @param data Input data array containing Fourier coefficients per ring. |
| 36 | + * @param output Output array for folded HEALPix pixel data. |
| 37 | + * @param nside The HEALPix Nside parameter. |
| 38 | + * @param L The harmonic band limit. |
| 39 | + * @param shift Flag indicating whether to apply FFT shifting. |
| 40 | + * @param stream CUDA stream for kernel execution. |
| 41 | + * @return HRESULT indicating success or failure. |
| 42 | + */ |
16 | 43 | template <typename complex>
|
17 | 44 | HRESULT launch_spectral_folding(complex* data, complex* output, const int& nside, const int& L,
|
18 | 45 | const bool& shift, cudaStream_t stream);
|
| 46 | + |
| 47 | +/** |
| 48 | + * @brief Launches the spectral extension CUDA kernel. |
| 49 | + * |
| 50 | + * This function configures and launches the spectral_extension kernel with |
| 51 | + * appropriate grid and block dimensions. It performs the inverse operation of |
| 52 | + * spectral folding, extending HEALPix pixel data back to full Fourier coefficient |
| 53 | + * space by mapping folded frequency components to their appropriate positions. |
| 54 | + * |
| 55 | + * @tparam complex The complex type (cufftComplex or cufftDoubleComplex). |
| 56 | + * @param data Input array containing folded HEALPix pixel data. |
| 57 | + * @param output Output array for extended Fourier coefficients per ring. |
| 58 | + * @param nside The HEALPix Nside parameter. |
| 59 | + * @param L The harmonic band limit. |
| 60 | + * @param stream CUDA stream for kernel execution. |
| 61 | + * @return HRESULT indicating success or failure. |
| 62 | + */ |
19 | 63 | template <typename complex>
|
20 | 64 | HRESULT launch_spectral_extension(complex* data, complex* output, const int& nside, const int& L,
|
21 | 65 | cudaStream_t stream);
|
22 | 66 |
|
| 67 | +/** |
| 68 | + * @brief Launches the shift/normalize CUDA kernel for HEALPix data processing. |
| 69 | + * |
| 70 | + * This function configures and launches the shift_normalize_kernel with appropriate |
| 71 | + * grid and block dimensions. It handles both single and double precision complex |
| 72 | + * types and applies the requested normalization and shifting operations to HEALPix |
| 73 | + * pixel data on a per-ring basis. |
| 74 | + * |
| 75 | + * @tparam complex The complex type (cufftComplex or cufftDoubleComplex). |
| 76 | + * @param stream CUDA stream for kernel execution. |
| 77 | + * @param data Input/output array of HEALPix pixel data (in-place processing). |
| 78 | + * @param nside The HEALPix Nside parameter. |
| 79 | + * @param apply_shift Flag indicating whether to apply FFT shifting. |
| 80 | + * @param norm Normalization type (0=by nphi, 1=by sqrt(nphi), 2=no normalization). |
| 81 | + * @return HRESULT indicating success or failure. |
| 82 | + */ |
23 | 83 | template <typename complex>
|
24 | 84 | HRESULT launch_shift_normalize_kernel(cudaStream_t stream,
|
25 | 85 | complex* data, // In-place data buffer
|
|
0 commit comments