@@ -17,28 +17,16 @@ project(TurboMind LANGUAGES CXX CUDA)
1717
1818find_package (CUDA 10.2 REQUIRED)
1919
20+ find_package (CUDAToolkit REQUIRED)
21+
2022if (${CUDA_VERSION_MAJOR} VERSION_GREATER_EQUAL "11" )
2123 add_definitions ("-DENABLE_BF16" )
2224 message ("CUDA_VERSION ${CUDA_VERSION_MAJOR} .${CUDA_VERSION_MINOR} is greater or equal than 11.0, enable -DENABLE_BF16 flag" )
2325endif ()
2426
25- # if((${CUDA_VERSION_MAJOR} VERSION_GREATER_EQUAL "11" AND ${CUDA_VERSION_MINOR} VERSION_GREATER_EQUAL "8") OR (${CUDA_VERSION_MAJOR} VERSION_GREATER_EQUAL "12"))
26- # add_definitions("-DENABLE_FP8")
27- # option(ENABLE_FP8 "ENABLE_FP8" OFF)
28- # if(ENABLE_FP8)
29- # message("CUDA_VERSION ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} is greater or equal than 11.8, enable -DENABLE_FP8 flag")
30- # endif()
31- # endif()
32-
3327set (CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR} /cmake/Modules)
3428
35- option (BUILD_PYT "Build in PyTorch TorchScript class mode" OFF )
36- if (NOT BUILD_MULTI_GPU)
37- option (BUILD_MULTI_GPU "Build project about multi-GPU" OFF )
38- endif ()
39- if (NOT USE_TRITONSERVER_DATATYPE)
40- option (USE_TRITONSERVER_DATATYPE "Build triton backend for triton server" OFF )
41- endif ()
29+ option (BUILD_MULTI_GPU "Build multi-gpu support" ON )
4230option (BUILD_PY_FFI "Build python ffi" ON )
4331option (BUILD_TEST "Build tests" OFF )
4432
@@ -89,43 +77,24 @@ if (LMDEPLOY_UBSAN_ENABLE)
8977endif ()
9078
9179if (BUILD_MULTI_GPU)
92- message (STATUS "Add DBUILD_MULTI_GPU, requires MPI and NCCL" )
93- add_definitions ("-DBUILD_MULTI_GPU" )
94- set (CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR} /cmake/Modules)
95- find_package (MPI REQUIRED)
96- find_package (NCCL REQUIRED)
97- set (CMAKE_MODULE_PATH "" ) # prevent the bugs for pytorch building
80+ add_definitions ("-DBUILD_MULTI_GPU=1" )
81+ set (CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR} /cmake/Modules)
82+ find_package (NCCL)
83+ if (NCCL_FOUND)
84+ set (USE_NCCL ON )
85+ add_definitions ("-DUSE_NCCL=1" )
86+ endif ()
9887endif ()
9988
100- if (BUILD_PYT)
101- if (DEFINED ENV{NVIDIA_PYTORCH_VERSION})
102- if ($ENV{NVIDIA_PYTORCH_VERSION} VERSION_LESS "20.03" )
103- message (FATAL_ERROR "NVIDIA PyTorch image is too old for TorchScript mode." )
104- endif ()
105- if ($ENV{NVIDIA_PYTORCH_VERSION} VERSION_EQUAL "20.03" )
106- add_definitions (-DLEGACY_THS=1)
107- endif ()
108- endif ()
109- endif ()
110-
111- if (USE_TRITONSERVER_DATATYPE)
112- message ("-- USE_TRITONSERVER_DATATYPE" )
113- add_definitions ("-DUSE_TRITONSERVER_DATATYPE" )
114- endif ()
11589
11690set (CXX_STD "17" CACHE STRING "C++ standard" )
11791# enable gold linker for binary and .so
11892set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold" )
11993set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=gold" )
12094set (CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR} )
12195
122- set (TF_PATH "" CACHE STRING "TensorFlow path" )
12396set (CUSPARSELT_PATH "" CACHE STRING "cuSPARSELt path" )
12497
125- if ((BUILD_TF OR BUILD_TF2) AND NOT TF_PATH)
126- message (FATAL_ERROR "TF_PATH must be set if BUILD_TF or BUILD_TF2 (=TensorFlow mode) is on." )
127- endif ()
128-
12998list (APPEND CMAKE_MODULE_PATH ${CUDA_PATH} /lib64)
13099
131100# profiling
@@ -204,64 +173,8 @@ if (SPARSITY_SUPPORT)
204173 add_definitions (-DSPARSITY_ENABLED=1)
205174endif ()
206175
207- if (BUILD_TF)
208- list (APPEND COMMON_HEADER_DIRS ${TF_PATH} /include )
209- list (APPEND COMMON_LIB_DIRS ${TF_PATH} )
210- add_definitions (-D_GLIBCXX_USE_CXX11_ABI=0)
211- endif ()
212-
213- if (BUILD_TF2)
214- list (APPEND COMMON_HEADER_DIRS ${TF_PATH} /include )
215- list (APPEND COMMON_LIB_DIRS ${TF_PATH} )
216- add_definitions (-D_GLIBCXX_USE_CXX11_ABI=1)
217- endif ()
218176
219177set (PYTHON_PATH "python" CACHE STRING "Python path" )
220- if (BUILD_PYT)
221- execute_process (COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; import torch; print(torch.__version__,end='');"
222- RESULT_VARIABLE _PYTHON_SUCCESS
223- OUTPUT_VARIABLE TORCH_VERSION)
224- if (TORCH_VERSION VERSION_LESS "1.5.0" )
225- message (FATAL_ERROR "PyTorch >= 1.5.0 is needed for TorchScript mode." )
226- endif ()
227- execute_process (COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; import os; import torch;
228- print(os.path.dirname(torch.__file__),end='');"
229- RESULT_VARIABLE _PYTHON_SUCCESS
230- OUTPUT_VARIABLE TORCH_DIR)
231- if (NOT _PYTHON_SUCCESS MATCHES 0)
232- message (FATAL_ERROR "Torch config Error." )
233- endif ()
234- list (APPEND CMAKE_PREFIX_PATH ${TORCH_DIR} )
235- find_package (Torch REQUIRED)
236- execute_process (COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; from distutils import sysconfig;
237- print(sysconfig.get_python_inc());"
238- RESULT_VARIABLE _PYTHON_SUCCESS
239- OUTPUT_VARIABLE PY_INCLUDE_DIR)
240- if (NOT _PYTHON_SUCCESS MATCHES 0)
241- message (FATAL_ERROR "Python config Error." )
242- endif ()
243- list (APPEND COMMON_HEADER_DIRS ${PY_INCLUDE_DIR} )
244- execute_process (COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; import torch;
245- print(torch._C._GLIBCXX_USE_CXX11_ABI,end='');"
246- RESULT_VARIABLE _PYTHON_SUCCESS
247- OUTPUT_VARIABLE USE_CXX11_ABI)
248- message ("-- USE_CXX11_ABI=${USE_CXX11_ABI} " )
249- if (USE_CXX11_ABI)
250- set (CMAKE_CUDA_FLAGS_RELWITHDEBINFO "${CMAKE_CUDA_FLAGS_RELWITHDEBINFO} -D_GLIBCXX_USE_CXX11_ABI=1" )
251- set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -D_GLIBCXX_USE_CXX11_ABI=1" )
252- set (CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -D_GLIBCXX_USE_CXX11_ABI=1" )
253- set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -D_GLIBCXX_USE_CXX11_ABI=1" )
254- set (CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -D_GLIBCXX_USE_CXX11_ABI=1" )
255- set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_GLIBCXX_USE_CXX11_ABI=1" )
256- else ()
257- set (CMAKE_CUDA_FLAGS_RELWITHDEBINFO "${CMAKE_CUDA_FLAGS_RELWITHDEBINFO} -D_GLIBCXX_USE_CXX11_ABI=0" )
258- set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -D_GLIBCXX_USE_CXX11_ABI=0" )
259- set (CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -D_GLIBCXX_USE_CXX11_ABI=0" )
260- set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -D_GLIBCXX_USE_CXX11_ABI=0" )
261- set (CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -D_GLIBCXX_USE_CXX11_ABI=0" )
262- set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_GLIBCXX_USE_CXX11_ABI=0" )
263- endif ()
264- endif ()
265178
266179# turn off warnings on windows
267180if (MSVC )
@@ -286,14 +199,6 @@ if (MSVC)
286199 endforeach ()
287200endif ()
288201
289- if (BUILD_MULTI_GPU)
290- list (APPEND COMMON_HEADER_DIRS ${MPI_INCLUDE_PATH} )
291- endif ()
292-
293- if (USE_TRITONSERVER_DATATYPE)
294- list (APPEND COMMON_HEADER_DIRS ${PROJECT_SOURCE_DIR} /../repo-core-src/include )
295- endif ()
296-
297202include_directories (
298203 ${COMMON_HEADER_DIRS}
299204)
@@ -314,111 +219,3 @@ endif()
314219if (BUILD_PY_FFI)
315220 install (TARGETS _turbomind DESTINATION ${CMAKE_SOURCE_DIR} /lmdeploy/lib)
316221endif ()
317-
318- if (MSVC )
319- return ()
320- endif ()
321-
322- # # Mesaure the compile time
323- option (MEASURE_BUILD_TIME "Measure the build time of each module" OFF )
324- if (MEASURE_BUILD_TIME)
325- set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CMAKE_COMMAND} -E time" )
326- set_property (GLOBAL PROPERTY RULE_LAUNCH_CUSTOM "${CMAKE_COMMAND} -E time" )
327- set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "${CMAKE_COMMAND} -E time" )
328- endif ()
329-
330- ########################################
331-
332- add_library (transformer-shared SHARED
333- $<TARGET_OBJECTS:DynamicDecodeLayer>
334- $<TARGET_OBJECTS:Llama>
335- $<TARGET_OBJECTS:LlamaTritonBackend>
336- $<TARGET_OBJECTS:TransformerTritonBackend>
337- $<TARGET_OBJECTS:activation_kernels>
338- $<TARGET_OBJECTS:ban_bad_words>
339- $<TARGET_OBJECTS:cublasAlgoMap>
340- $<TARGET_OBJECTS:cublasMMWrapper>
341- $<TARGET_OBJECTS:cuda_utils>
342- $<TARGET_OBJECTS:custom_ar_comm>
343- $<TARGET_OBJECTS:custom_ar_kernels>
344- $<TARGET_OBJECTS:attention>
345- $<TARGET_OBJECTS:decoding_kernels>
346- $<TARGET_OBJECTS:gpt_kernels>
347- $<TARGET_OBJECTS:logprob_kernels>
348- $<TARGET_OBJECTS:logger>
349- $<TARGET_OBJECTS:memory_utils>
350- $<TARGET_OBJECTS:mpi_utils>
351- $<TARGET_OBJECTS:nccl_utils>
352- $<TARGET_OBJECTS:nvtx_utils>
353- $<TARGET_OBJECTS:anomaly_handler>
354- $<TARGET_OBJECTS:sampling_penalty_kernels>
355- $<TARGET_OBJECTS:sampling_topk_kernels>
356- $<TARGET_OBJECTS:sampling_topp_kernels>
357- $<TARGET_OBJECTS:stop_criteria>
358- $<TARGET_OBJECTS:tensor>
359- $<TARGET_OBJECTS:unfused_attention_kernels>
360- )
361-
362- if (BUILD_MULTI_GPU)
363- target_link_libraries (transformer-shared PUBLIC
364- ${MPI_CXX_LIBRARIES}
365- ${NCCL_LIBRARIES}
366- )
367- endif ()
368-
369- if (USE_NVTX)
370- target_link_libraries (transformer-shared PUBLIC
371- -lnvToolsExt
372- )
373- endif ()
374-
375- set_target_properties (transformer-shared PROPERTIES POSITION_INDEPENDENT_CODE ON )
376- set_target_properties (transformer-shared PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON )
377- set_target_properties (transformer-shared PROPERTIES LINKER_LANGUAGE CXX)
378- target_link_libraries (transformer-shared PUBLIC -lcudart -lcublas -lcublasLt -lcurand)
379-
380- include (GNUInstallDirs)
381- set (INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR} /cmake/TurboMind)
382-
383- include (CMakePackageConfigHelpers)
384- configure_package_config_file(
385- ${CMAKE_CURRENT_LIST_DIR} /cmake/TurboMindConfig.cmake.in
386- ${CMAKE_CURRENT_BINARY_DIR} /TurboMindConfig.cmake
387- INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
388- )
389-
390- install (
391- FILES
392- ${CMAKE_CURRENT_BINARY_DIR} /TurboMindConfig.cmake
393- DESTINATION ${INSTALL_CONFIGDIR}
394- )
395-
396- install (
397- TARGETS
398- transformer-shared
399- EXPORT
400- transformer-shared-targets
401- LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX} /backends/turbomind
402- ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX} /backends/turbomind
403- RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX} /bin
404- )
405-
406- install (
407- EXPORT
408- transformer-shared-targets
409- FILE
410- TurboMindTargets.cmake
411- DESTINATION
412- ${INSTALL_CONFIGDIR}
413- )
414-
415- export (
416- EXPORT
417- transformer-shared-targets
418- FILE
419- ${CMAKE_CURRENT_BINARY_DIR} /TurboMindTargets.cmake
420- NAMESPACE
421- TritonCore::
422- )
423-
424- export (PACKAGE TurboMind)
0 commit comments