Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ if(BUILD_METAL)
add_compile_options(-DMODMESH_METAL)
endif()

option(BUILD_CUDA "build with CUDA" OFF)
message(STATUS "BUILD_CUDA: ${BUILD_CUDA}")

option(USE_CLANG_TIDY "use clang-tidy" OFF)
option(LINT_AS_ERRORS "clang-tidy warnings as errors" OFF)

Expand Down
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ DEBUG_SYMBOL ?= ON
MODMESH_PROFILE ?= OFF
BUILD_METAL ?= OFF
BUILD_QT ?= ON
BUILD_CUDA ?= OFF
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, let's make it off by default. Perhaps the variable BUILD_CUDA can be moved to be adjacent to BUILD_METAL like:

BUILD_CUDA ?= OFF
BUILD_METAL ?= OFF
BUILD_QT ?= ON

USE_CLANG_TIDY ?= OFF
CMAKE_BUILD_TYPE ?= Release
MAKE_PARALLEL ?= -j
Expand Down Expand Up @@ -89,6 +90,7 @@ CMAKE_CMD = cmake $(MODMESH_ROOT) \
-DDEBUG_SYMBOL=$(DEBUG_SYMBOL) \
-DBUILD_METAL=$(BUILD_METAL) \
-DBUILD_QT=$(BUILD_QT) \
-DBUILD_CUDA=$(BUILD_CUDA) \
-DUSE_CLANG_TIDY=$(USE_CLANG_TIDY) \
-DLINT_AS_ERRORS=ON \
-DMODMESH_PROFILE=$(MODMESH_PROFILE) \
Expand Down
17 changes: 17 additions & 0 deletions cpp/modmesh/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,17 @@ if (BUILD_QT)
find_package(Qt6 REQUIRED COMPONENTS 3DExtras)
endif () # BUILD_QT

if (BUILD_CUDA)
find_package(CUDA REQUIRED)
find_package(CUDAToolkit REQUIRED)
enable_language(CUDA)
if (TARGET CUDA::cufft)
message(STATUS "CUDA cuFFT available")
else ()
message(FATAL_ERROR "CUDA cuFFT not found")
endif ()
endif () # BUILD_CUDA

add_subdirectory(buffer)
add_subdirectory(mesh)
add_subdirectory(toggle)
Expand Down Expand Up @@ -135,6 +146,12 @@ else () # BUILD_QT
)
endif () # BUILD_QT

if (BUILD_CUDA)
set_target_properties(modmesh_primary PROPERTIES LINKER_LANGUAGE CUDA)
target_compile_definitions(modmesh_primary PRIVATE BUILD_CUDA)
target_link_libraries(modmesh_primary PRIVATE CUDA::cudart CUDA::cufft)
endif () # BUILD_CUDA

set_target_properties(modmesh_primary PROPERTIES POSITION_INDEPENDENT_CODE ON)

if (CLANG_TIDY_EXE AND USE_CLANG_TIDY)
Expand Down
89 changes: 89 additions & 0 deletions cpp/modmesh/device/cuda/cuda_error_handle.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#pragma once

/*
* Copyright (c) 2025, Alex Chiang <[email protected]>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

#include <cuda.h>
#include <cuda_runtime.h>
#include <cufft.h>
#include <stdio.h>

#define CUDA_SAFE_CALL(err) __cudaSafeCall(err, __FILE__, __LINE__)
#define CUFFT_SAFE_CALL(err) __cufftSafeCall(err, __FILE__, __LINE__)
#define CUDA_GET_LAST_ERROR() __cudaCheckError(__FILE__, __LINE__)

void inline __cudaSafeCall(cudaError_t err, const char * file, const int line)
{
if (err != cudaSuccess)
{
printf("CUDA Error %d: %s.\n%s(%d)\n", (int)err, cudaGetErrorString(err), file, line);
}
}

void inline __cudaCheckError(const char * file, const int line)
{
cudaError_t err = cudaDeviceSynchronize();
if (err != cudaSuccess)
{
printf("CUDA Error %d: %s.\n%s(%d)\n", (int)err, cudaGetErrorString(err), file, line);
}

err = cudaGetLastError();
if (err != cudaSuccess)
{
printf("CUDA Error %d: %s.\n%s(%d)\n", (int)err, cudaGetErrorString(err), file, line);
}
}

const inline char * __cufftResultToString(cufftResult err)
{
switch (err)
{
case CUFFT_SUCCESS: return "CUFFT_SUCCESS.";
case CUFFT_INVALID_PLAN: return "CUFFT_INVALID_PLAN.";
case CUFFT_ALLOC_FAILED: return "CUFFT_ALLOC_FAILED.";
case CUFFT_INVALID_TYPE: return "CUFFT_INVALID_TYPE.";
case CUFFT_INVALID_VALUE: return "CUFFT_INVALID_VALUE.";
case CUFFT_INTERNAL_ERROR: return "CUFFT_INTERNAL_ERROR.";
case CUFFT_EXEC_FAILED: return "CUFFT_EXEC_FAILED.";
case CUFFT_SETUP_FAILED: return "CUFFT_SETUP_FAILED.";
case CUFFT_INVALID_SIZE: return "CUFFT_INVALID_SIZE.";
case CUFFT_UNALIGNED_DATA: return "CUFFT_UNALIGNED_DATA.";
default: return "CUFFT Unknown error code.";
}
}

void inline __cufftSafeCall(cufftResult err, const char * file, const int line)
{
if (CUFFT_SUCCESS != err)
{
printf("CUFFT error %d: %s\n%s(%d)\n", (int)err, __cufftResultToString(err), file, line);
}
}

// vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4:
5 changes: 5 additions & 0 deletions cpp/modmesh/transform/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ set(MODMESH_TRANSFORM_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/fourier.hpp
CACHE FILEPATH "" FORCE)

if (BUILD_CUDA)
list(APPEND MODMESH_TRANSFORM_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/fourier.cuh)
endif()

set(MODMESH_TRANSFORM_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/fourier.cpp
CACHE FILEPATH "" FORCE)
Expand Down
85 changes: 85 additions & 0 deletions cpp/modmesh/transform/fourier.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#pragma once

/*
* Copyright (c) 2025, Alex Chiang <[email protected]>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

#include <modmesh/modmesh.hpp>
#include <modmesh/buffer/buffer.hpp>
#include <modmesh/device/cuda/cuda_error_handle.hpp>

#define FFT_CUDA_IMPL(CUFFT_DATA_TYPE, CUFFT_EXEC_TYPE) \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jysh1214 could you please evaluate if this can be made as a template function? If it can be done within a couple of hours, please update this macro to use a function template or merged it into the function template fft_cuda.

If you cannot make it quickly or do not know how to make it, please leave a TODO comment to state that a future contributor should evaluate how to turn the macro into a function template.

{ \
cufftHandle plan; \
CUFFT_DATA_TYPE * host_in = nullptr; \
CUFFT_DATA_TYPE * host_out = nullptr; \
CUFFT_DATA_TYPE * device_in = nullptr; \
CUFFT_DATA_TYPE * device_out = nullptr; \
host_in = (CUFFT_DATA_TYPE*)malloc(sizeof(CUFFT_DATA_TYPE) * N); \
host_out = (CUFFT_DATA_TYPE*)malloc(sizeof(CUFFT_DATA_TYPE) * N); \
for (size_t i = 0; i < N; ++i) \
{ \
host_in[i].x = in[i].real(); \
host_in[i].y = in[i].imag(); \
} \
CUDA_SAFE_CALL(cudaMalloc((void**)&device_in, sizeof(CUFFT_DATA_TYPE) * N)); \
CUDA_SAFE_CALL(cudaMalloc((void**)&device_out, sizeof(CUFFT_DATA_TYPE) * N)); \
CUDA_SAFE_CALL(cudaMemcpy(device_in, host_in, sizeof(CUFFT_DATA_TYPE) * N, cudaMemcpyHostToDevice)); \
CUFFT_SAFE_CALL(cufftPlan1d(&plan, N, CUFFT_##CUFFT_EXEC_TYPE, 1)); \
CUFFT_SAFE_CALL(cufftExec##CUFFT_EXEC_TYPE(plan, device_in, device_out, CUFFT_FORWARD)); \
CUDA_SAFE_CALL(cudaMemcpy(host_out, device_out, sizeof(CUFFT_DATA_TYPE) * N, cudaMemcpyDeviceToHost)); \
for (size_t i = 0; i < N; ++i) \
{ \
out[i] = T1<T2>{ host_out[i].x, host_out[i].y }; \
} \
CUFFT_SAFE_CALL(cufftDestroy(plan)); \
CUDA_SAFE_CALL(cudaFree(device_in)); \
CUDA_SAFE_CALL(cudaFree(device_out)); \
free(host_in); \
free(host_out); \
}

namespace modmesh
{

template <template <typename> class T1, typename T2>
void fft_cuda(SimpleArray<T1<T2>> const & in, SimpleArray<T1<T2>> & out)
{
size_t N = in.size();
if constexpr (std::is_same_v<T2, float>)
{
FFT_CUDA_IMPL(cufftComplex, C2C)
}
else if constexpr (std::is_same_v<T2, double>)
{
FFT_CUDA_IMPL(cufftDoubleComplex, Z2Z)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#undef FFT_CUDA_IMPL macro after use.

}
}

} /* namespace modmesh */

// vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4:
33 changes: 26 additions & 7 deletions cpp/modmesh/transform/fourier.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
#include <modmesh/math/math.hpp>
#include <modmesh/buffer/buffer.hpp>

#if defined(BUILD_CUDA)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer to avoid any BUILD_CUDA macro from outside device/cuda. How much work it takes to make it?

#include <modmesh/transform/fourier.cuh>
#endif

namespace modmesh
{

Expand Down Expand Up @@ -63,22 +67,37 @@ class FourierTransform
FourierTransform & operator=(FourierTransform && other) = delete;

template <template <typename> class T1, typename T2>
static void fft(SimpleArray<T1<T2>> const & in, SimpleArray<T1<T2>> & out)
static void fft(SimpleArray<T1<T2>> const & in, SimpleArray<T1<T2>> & out, std::string const && backend)
{
const size_t N = in.size();

if ((N & (N - 1)) == 0)
if (backend == "cpu")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

String comparison can only be done during runtime, but the comparison should be available during compile-time.

{
if ((N & (N - 1)) == 0)
{
detail::fft_radix_2<T1, T2>(in, out);
}
else
{
detail::fft_bluestein<T1, T2>(in, out);
}
}
else if (backend == "cuda")
{
detail::fft_radix_2<T1, T2>(in, out);
#if defined(BUILD_CUDA)
modmesh::fft_cuda<T1, T2>(in, out);
#else
throw std::runtime_error("CUDA is not available.");
#endif
}
else
{
detail::fft_bluestein<T1, T2>(in, out);
throw std::runtime_error("unsupported backend.");
}
}

template <template <typename> class T1, typename T2>
static void ifft(SimpleArray<T1<T2>> const & in, SimpleArray<T1<T2>> & out)
static void ifft(SimpleArray<T1<T2>> const & in, SimpleArray<T1<T2>> & out, std::string const && backend)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use an enum for backend. Do not use a string which can only be checked during runtime.

{
size_t N = in.size();
SimpleArray<T1<T2>> in_conj{modmesh::small_vector<size_t>{N}, T1<T2>{0.0, 0.0}};
Expand All @@ -88,7 +107,7 @@ class FourierTransform
in_conj[i] = in[i].conj();
}

fft<T1, T2>(in_conj, out);
fft<T1, T2>(in_conj, out, std::move(backend));

for (size_t i = 0; i < N; ++i)
{
Expand Down Expand Up @@ -152,7 +171,7 @@ void fft_bluestein(SimpleArray<T1<T2>> const & in, SimpleArray<T1<T2>> & out)
A[i] *= B[i];
}

FourierTransform::ifft<T1, T2>(A, a);
FourierTransform::ifft<T1, T2>(A, a, "cpu");

for (size_t i = 0; i < N; ++i)
{
Expand Down
8 changes: 4 additions & 4 deletions cpp/modmesh/transform/pymod/wrap_fourier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,10 @@ class MODMESH_PYTHON_WRAPPER_VISIBILITY WrapFourierTransform
namespace py = pybind11; // NOLINT(misc-unused-alias-decls)

(*this)
.def_static("fft", &wrapped_type::fft<modmesh::Complex, double>, py::arg("input"), py::arg("output"))
.def_static("fft", &wrapped_type::fft<modmesh::Complex, float>, py::arg("input"), py::arg("output"))
.def_static("ifft", &wrapped_type::ifft<modmesh::Complex, double>, py::arg("input"), py::arg("output"))
.def_static("ifft", &wrapped_type::ifft<modmesh::Complex, float>, py::arg("input"), py::arg("output"))
.def_static("fft", &wrapped_type::fft<modmesh::Complex, double>, py::arg("input"), py::arg("output"), py::arg("backend"))
.def_static("fft", &wrapped_type::fft<modmesh::Complex, float>, py::arg("input"), py::arg("output"), py::arg("backend"))
.def_static("ifft", &wrapped_type::ifft<modmesh::Complex, double>, py::arg("input"), py::arg("output"), py::arg("backend"))
.def_static("ifft", &wrapped_type::ifft<modmesh::Complex, float>, py::arg("input"), py::arg("output"), py::arg("backend"))
.def_static("dft", &wrapped_type::dft<modmesh::Complex, double>, py::arg("input"), py::arg("output"))
.def_static("dft", &wrapped_type::dft<modmesh::Complex, float>, py::arg("input"), py::arg("output"));
}
Expand Down
12 changes: 12 additions & 0 deletions gtests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,18 @@ target_link_libraries(
GTest::gmock_main
)

if (BUILD_CUDA)
find_package(CUDA REQUIRED)
find_package(CUDAToolkit REQUIRED)
enable_language(CUDA)
target_link_libraries(
test_nopython
CUDA::cudart
CUDA::cufft
)
target_compile_definitions(test_nopython PRIVATE BUILD_CUDA)
endif()

include(GoogleTest)
gtest_discover_tests(test_nopython)

Expand Down
Loading
Loading