From b3f653632e83d9a423320580db6f5ade3908bc18 Mon Sep 17 00:00:00 2001 From: wiryls <7984500+wiryls@users.noreply.github.com> Date: Mon, 8 Jul 2024 19:56:21 +0800 Subject: [PATCH 1/4] refactor: clean cmake and move torch version from cmake to file --- dipu/CMakeLists.txt | 73 ++++++------------- dipu/detect_env.py | 13 +--- dipu/torch_dipu/csrc_dipu/CMakeLists.txt | 5 +- dipu/torch_dipu/csrc_dipu/base/basedef.h | 2 + dipu/torch_dipu/csrc_dipu/base/dependency.h | 12 +++ .../csrc_dipu/profiler/collection.cpp | 2 + 6 files changed, 44 insertions(+), 63 deletions(-) create mode 100644 dipu/torch_dipu/csrc_dipu/base/dependency.h diff --git a/dipu/CMakeLists.txt b/dipu/CMakeLists.txt index e0658b855..ea3292c79 100755 --- a/dipu/CMakeLists.txt +++ b/dipu/CMakeLists.txt @@ -1,16 +1,6 @@ cmake_minimum_required(VERSION 3.20) project(TorchDIPU LANGUAGES CXX) -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) - -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) - -# the default CMAKE_BUILD_TYPE is Release -if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE "Release") -endif() option(ENABLE_LTO "Enable Link Time Optimization in Release build" ON) if(ENABLE_LTO AND CMAKE_BUILD_TYPE STREQUAL "Release") @@ -29,13 +19,17 @@ else() message(STATUS "IPO / LTO disabled") endif() -option(TESTS "Whether to build unit tests" OFF) -option(LIBS "Whether to build dipu lib, default on" ON) - -# use gcover +option(DPIU_ENABLE_TEST "Whether to build unit tests" OFF) +option(DPIU_ENABLE_LIB "Whether to build dipu lib, default on" ON) option(ENABLE_COVERAGE "Use gcov" OFF) + message(STATUS ENABLE_COVERAGE=${ENABLE_COVERAGE}) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + + # device related set(DEVICE "camb" @@ -102,7 +96,7 @@ add_compile_definitions(${DIPU_VENDOR_NAME_FLAG_DEF}=1) message(STATUS "Vendor name flag definition: " ${DIPU_VENDOR_NAME_FLAG_DEF}) string(REGEX MATCH "[a-z0-9_]+" DIPU_GIT_HASH ${DIPU_GIT_HASH}) -execute_process(COMMAND sh -c "git status --porcelain | egrep '^(M| M)' | wc -l" +execute_process(COMMAND "git status --porcelain | egrep '^(M| M)' | wc -l" OUTPUT_VARIABLE DIPU_MODIFY_LEN) if(DIPU_MODIFY_LEN GREATER 0) set(DIPU_GIT_HASH ${DIPU_GIT_HASH}-dirty) @@ -113,15 +107,12 @@ add_compile_definitions(DIPU_GIT_HASH="${DIPU_GIT_HASH}") # Automatically generate a list of supported diopi functions execute_process( COMMAND - sh -c "grep -Po 'diopi[a-zA-Z0-9]+(?=\\()' ${CMAKE_CURRENT_SOURCE_DIR}/scripts/autogen_diopi_wrapper/diopi_functions.yaml | sort -uf > ${CMAKE_CURRENT_SOURCE_DIR}/SupportedDiopiFunctions.txt" ) # abi_v, torch dir, abi flag, cmake path execute_process( -COMMAND - sh -c - "python ${CMAKE_CURRENT_SOURCE_DIR}/detect_env.py" + COMMAND python detect_env.py OUTPUT_VARIABLE CHECK_TORCH_OUT OUTPUT_STRIP_TRAILING_WHITESPACE) string(REGEX REPLACE "\n" ";" CHECK_TORCH_OUT_LIST "${CHECK_TORCH_OUT}") @@ -157,16 +148,10 @@ endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=${DIPU_COMPILED_WITH_CXX11_ABI}") message(STATUS "DIPU_COMPILED_WITH_CXX11_ABI:" ${DIPU_COMPILED_WITH_CXX11_ABI}) -execute_process(COMMAND sh -c "dirname $(find $(dirname $(which python))/../ -name Python.h)" - OUTPUT_VARIABLE PYTHON_INCLUDE_DIR) -message(STATUS "PYTHON_INCLUDE_DIR: " ${PYTHON_INCLUDE_DIR}) - -# config +# Import BaseFuncions include(cmake/BaseFuncions.cmake) -_set_cpp_flags() - -# if add new version, please also update dipu/__init__.py torch_ver_XX -list(APPEND DIPU_SUPPORT_TORCHS "2.0.0" "2.1.0" "2.1.1") +# TODO: fix it. +# _set_cpp_flags() # use muxi torch. if device is muxi and torch has 'cuda'(muxi). if (${DEVICE} IN_LIST DEVICE_MUXI AND DIPU_TORCH_HAS_CUDA) @@ -175,31 +160,17 @@ if (${DEVICE} IN_LIST DEVICE_MUXI AND DIPU_TORCH_HAS_CUDA) add_compile_definitions(USE_MACA=1) endif() +# if add new version, please also update dipu/__init__.py torch_ver_XX +set(DIPU_SUPPORT_TORCHS "2.0.0;2.1.0;2.1.1") + find_package(Torch REQUIRED) -message(STATUS "Found Torch Version: ${Torch_VERSION}") -if(NOT ${Torch_VERSION} IN_LIST DIPU_SUPPORT_TORCHS) - message(FATAL_ERROR "this torch version is not supported by DIPU ${DIPU_SUPPORT_TORCHS}") +if(NOT Torch_VERSION IN_LIST DIPU_SUPPORT_TORCHS) + message(FATAL_ERROR "Torch ${Torch_VERSION} is not in DIPU supported version ${DIPU_SUPPORT_TORCHS}") +else() + message(STATUS "Found Torch: ${Torch_VERSION}") endif() -# message(STATUS "Torch TORCH_LIBRARY: ${TORCH_LIBRARY}") -include_directories(SYSTEM ${TORCH_INCLUDE_DIRS}) -message(STATUS "Torch TORCH_INCLUDE_DIRS: ${TORCH_INCLUDE_DIRS}") -message(STATUS "Torch TORCH_CXX_FLAGS: ${TORCH_CXX_FLAGS}") - -string(REGEX REPLACE "^.*(..)\$" "\\1" Torch_VERSION_MINOR "0000${Torch_VERSION_MINOR}") -string(REGEX REPLACE "^.*(..)\$" "\\1" Torch_VERSION_PATCH "0000${Torch_VERSION_PATCH}") - -string(CONCAT Torch_VERSION ${Torch_VERSION_MAJOR}${Torch_VERSION_MINOR}${Torch_VERSION_PATCH}) -# expand to pattern XYYZZ by leading zero on minor and patch ver -message(STATUS "change to dipu Torch Version: ${Torch_VERSION}") - -# add_compile_definitions(DIPU_TORCH${Torch_VERSION}) -add_compile_definitions(DIPU_TORCH_VERSION=${Torch_VERSION}) - -link_directories(${PYTORCH_DIR}/torch/lib) -set(DIPU_TORCH_VERSION ${Torch_VERSION}) - # end torch find if(ENABLE_COVERAGE) @@ -209,10 +180,10 @@ endif() add_subdirectory(third_party) -if(LIBS) +if(DPIU_ENABLE_LIB) add_subdirectory(torch_dipu/csrc_dipu) endif() -if(TESTS) +if(DPIU_ENABLE_TEST) add_subdirectory(tests/cpp) endif() diff --git a/dipu/detect_env.py b/dipu/detect_env.py index 8d40ee49d..5243877cd 100644 --- a/dipu/detect_env.py +++ b/dipu/detect_env.py @@ -3,17 +3,10 @@ sys.path.append(os.getenv("PYTORCH_DIR", default="")) import torch -import builtins from pathlib import Path -print( - next( - item[-4:-2] - for item in dir(builtins) - if "__pybind11_internals_v4_gcc_libstdcpp_cxxabi10" in item - ) -) +print(torch._C._PYBIND11_BUILD_ABI[-2:]) print(Path(torch.__path__[0]).parent.absolute()) -print(1 if torch.compiled_with_cxx11_abi() else 0) +print(int(torch.compiled_with_cxx11_abi())) print(torch.utils.cmake_prefix_path) -print(1 if torch.cuda.is_available() else 0) +print(int(torch.cuda.is_available())) diff --git a/dipu/torch_dipu/csrc_dipu/CMakeLists.txt b/dipu/torch_dipu/csrc_dipu/CMakeLists.txt index 6db1414d1..00442e7ea 100644 --- a/dipu/torch_dipu/csrc_dipu/CMakeLists.txt +++ b/dipu/torch_dipu/csrc_dipu/CMakeLists.txt @@ -149,6 +149,7 @@ target_include_directories(torch_dipu SYSTEM PUBLIC "${kineto_SOURCE_DIR}/include" PRIVATE "${kineto_SOURCE_DIR}/src") +# target_link_directories(torch_dipu PRIVATE "${PYTORCH_DIR}/torch/lib") target_link_libraries(torch_dipu PRIVATE Python3::Python torch) target_link_libraries(torch_dipu PRIVATE Threads::Threads) target_link_libraries(torch_dipu PRIVATE diopi) @@ -166,13 +167,13 @@ Python3_add_library(torch_dipu_python SHARED ${TORCH_DIPU_PYBIND_SOURCE}) # cause diopirt hidden, so temporarily use this target level setting. enhance in # future. set_target_properties(torch_dipu_python PROPERTIES CXX_VISIBILITY_PRESET hidden) -target_link_libraries(torch_dipu_python PUBLIC torch_dipu PRIVATE kineto) +target_link_libraries(torch_dipu_python PUBLIC torch_dipu PRIVATE torch kineto) target_include_directories(torch_dipu_python SYSTEM PRIVATE ..) #[[ Target: torch_dipu_cpython_extension ]] Python3_add_library(torch_dipu_cpython_extension SHARED stub.cpp) target_include_directories(torch_dipu_cpython_extension SYSTEM PRIVATE ..) -target_link_libraries(torch_dipu_cpython_extension PUBLIC torch_dipu_python PRIVATE torch_python) +target_link_libraries(torch_dipu_cpython_extension PUBLIC torch_dipu_python PRIVATE torch) target_compile_options(torch_dipu_cpython_extension PRIVATE -fstack-protector-all) set_target_properties(torch_dipu_cpython_extension PROPERTIES OUTPUT_NAME "_C.cpython-${Python3_VERSION_MAJOR}${Python3_VERSION_MINOR}-${CMAKE_SYSTEM_PROCESSOR}-linux-gnu" diff --git a/dipu/torch_dipu/csrc_dipu/base/basedef.h b/dipu/torch_dipu/csrc_dipu/base/basedef.h index 8f416e8ef..f3904f627 100644 --- a/dipu/torch_dipu/csrc_dipu/base/basedef.h +++ b/dipu/torch_dipu/csrc_dipu/base/basedef.h @@ -6,6 +6,8 @@ #include "csrc_dipu/runtime/device/basedef.h" +#include "dependency.h" + #define DIPU_DEVICE_TYPE_MACRO XPU #define DIPU_AUTOGRAD_DEVICE_TYPE_MACRO \ C10_CONCATENATE(Autograd, DIPU_DEVICE_TYPE_MACRO) diff --git a/dipu/torch_dipu/csrc_dipu/base/dependency.h b/dipu/torch_dipu/csrc_dipu/base/dependency.h new file mode 100644 index 000000000..883a5f7bc --- /dev/null +++ b/dipu/torch_dipu/csrc_dipu/base/dependency.h @@ -0,0 +1,12 @@ +// Copyright (c) 2023, DeepLink. +#pragma once + +#include + +#if TORCH_VERSION_MINOR < 100 && TORCH_VERSION_PATCH < 100 +#define DIPU_TORCH_VERSION \ + ((TORCH_VERSION_MAJOR * 100 + TORCH_VERSION_MINOR) * 100 + \ + TORCH_VERSION_PATCH) +#else +#error "require refactoring: version number exceeds limit" +#endif diff --git a/dipu/torch_dipu/csrc_dipu/profiler/collection.cpp b/dipu/torch_dipu/csrc_dipu/profiler/collection.cpp index 4ccaaa190..0f3059368 100644 --- a/dipu/torch_dipu/csrc_dipu/profiler/collection.cpp +++ b/dipu/torch_dipu/csrc_dipu/profiler/collection.cpp @@ -22,6 +22,8 @@ #include #include +#include "csrc_dipu/base/dependency.h" + #include "profiler_python.h" namespace dipu { From b282aa6daf5ecb2c9370168b6e2e5298dc6556a5 Mon Sep 17 00:00:00 2001 From: wiryls <7984500+wiryls@users.noreply.github.com> Date: Tue, 9 Jul 2024 10:58:17 +0800 Subject: [PATCH 2/4] fix: manually set WORKING_DIRECTORY --- dipu/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dipu/CMakeLists.txt b/dipu/CMakeLists.txt index ea3292c79..9b073cbcb 100755 --- a/dipu/CMakeLists.txt +++ b/dipu/CMakeLists.txt @@ -106,13 +106,13 @@ add_compile_definitions(DIPU_GIT_HASH="${DIPU_GIT_HASH}") # Automatically generate a list of supported diopi functions execute_process( - COMMAND - "grep -Po 'diopi[a-zA-Z0-9]+(?=\\()' ${CMAKE_CURRENT_SOURCE_DIR}/scripts/autogen_diopi_wrapper/diopi_functions.yaml | sort -uf > ${CMAKE_CURRENT_SOURCE_DIR}/SupportedDiopiFunctions.txt" -) + COMMAND "grep -Po 'diopi[a-zA-Z0-9]+(?=\\()' ./scripts/autogen_diopi_wrapper/diopi_functions.yaml | sort -uf > ./SupportedDiopiFunctions.txt" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") # abi_v, torch dir, abi flag, cmake path execute_process( COMMAND python detect_env.py + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE CHECK_TORCH_OUT OUTPUT_STRIP_TRAILING_WHITESPACE) string(REGEX REPLACE "\n" ";" CHECK_TORCH_OUT_LIST "${CHECK_TORCH_OUT}") From adc409fc29577fd73f6a9704c5eb04d1f6b77366 Mon Sep 17 00:00:00 2001 From: wiryls <7984500+wiryls@users.noreply.github.com> Date: Tue, 9 Jul 2024 17:26:32 +0800 Subject: [PATCH 3/4] debug: CMakeLists.txt --- dipu/torch_dipu/csrc_dipu/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dipu/torch_dipu/csrc_dipu/CMakeLists.txt b/dipu/torch_dipu/csrc_dipu/CMakeLists.txt index 00442e7ea..751c47724 100644 --- a/dipu/torch_dipu/csrc_dipu/CMakeLists.txt +++ b/dipu/torch_dipu/csrc_dipu/CMakeLists.txt @@ -149,7 +149,6 @@ target_include_directories(torch_dipu SYSTEM PUBLIC "${kineto_SOURCE_DIR}/include" PRIVATE "${kineto_SOURCE_DIR}/src") -# target_link_directories(torch_dipu PRIVATE "${PYTORCH_DIR}/torch/lib") target_link_libraries(torch_dipu PRIVATE Python3::Python torch) target_link_libraries(torch_dipu PRIVATE Threads::Threads) target_link_libraries(torch_dipu PRIVATE diopi) @@ -167,13 +166,14 @@ Python3_add_library(torch_dipu_python SHARED ${TORCH_DIPU_PYBIND_SOURCE}) # cause diopirt hidden, so temporarily use this target level setting. enhance in # future. set_target_properties(torch_dipu_python PROPERTIES CXX_VISIBILITY_PRESET hidden) -target_link_libraries(torch_dipu_python PUBLIC torch_dipu PRIVATE torch kineto) +target_link_libraries(torch_dipu_python PUBLIC torch_dipu torch kineto) target_include_directories(torch_dipu_python SYSTEM PRIVATE ..) #[[ Target: torch_dipu_cpython_extension ]] Python3_add_library(torch_dipu_cpython_extension SHARED stub.cpp) target_include_directories(torch_dipu_cpython_extension SYSTEM PRIVATE ..) -target_link_libraries(torch_dipu_cpython_extension PUBLIC torch_dipu_python PRIVATE torch) +target_link_directories(torch_dipu_cpython_extension PRIVATE "${PYTORCH_DIR}/torch/lib") +target_link_libraries(torch_dipu_cpython_extension PUBLIC torch_dipu_python PRIVATE torch_python) target_compile_options(torch_dipu_cpython_extension PRIVATE -fstack-protector-all) set_target_properties(torch_dipu_cpython_extension PROPERTIES OUTPUT_NAME "_C.cpython-${Python3_VERSION_MAJOR}${Python3_VERSION_MINOR}-${CMAKE_SYSTEM_PROCESSOR}-linux-gnu" From d12262974bc2a0044d6633e04ef4c6686b49deab Mon Sep 17 00:00:00 2001 From: wiryls <7984500+wiryls@users.noreply.github.com> Date: Tue, 16 Jul 2024 15:44:21 +0800 Subject: [PATCH 4/4] fix: rollback version-detection --- dipu/CMakeLists.txt | 5 ++++- dipu/third_party/CMakeLists.txt | 3 ++- dipu/torch_dipu/csrc_dipu/CMakeLists.txt | 1 + dipu/torch_dipu/csrc_dipu/base/basedef.h | 2 -- dipu/torch_dipu/csrc_dipu/base/dependency.h | 12 ------------ dipu/torch_dipu/csrc_dipu/profiler/collection.cpp | 2 -- 6 files changed, 7 insertions(+), 18 deletions(-) delete mode 100644 dipu/torch_dipu/csrc_dipu/base/dependency.h diff --git a/dipu/CMakeLists.txt b/dipu/CMakeLists.txt index 9b073cbcb..c14eb7c2a 100755 --- a/dipu/CMakeLists.txt +++ b/dipu/CMakeLists.txt @@ -168,7 +168,10 @@ find_package(Torch REQUIRED) if(NOT Torch_VERSION IN_LIST DIPU_SUPPORT_TORCHS) message(FATAL_ERROR "Torch ${Torch_VERSION} is not in DIPU supported version ${DIPU_SUPPORT_TORCHS}") else() - message(STATUS "Found Torch: ${Torch_VERSION}") + string(REGEX REPLACE "^ERR[0-9]*([0-9][0-9])\$" "\\1" Torch_VERSION_MINOR "ERR00${Torch_VERSION_MINOR}") + string(REGEX REPLACE "^ERR[0-9]*([0-9][0-9])\$" "\\1" Torch_VERSION_PATCH "ERR00${Torch_VERSION_PATCH}") + set(DIPU_TORCH_VERSION "${Torch_VERSION_MAJOR}${Torch_VERSION_MINOR}${Torch_VERSION_PATCH}") + message(STATUS "DIPU_TORCH_VERSION: ${DIPU_TORCH_VERSION}") endif() # end torch find diff --git a/dipu/third_party/CMakeLists.txt b/dipu/third_party/CMakeLists.txt index e362029d2..7f6c2f666 100644 --- a/dipu/third_party/CMakeLists.txt +++ b/dipu/third_party/CMakeLists.txt @@ -61,6 +61,7 @@ message(STATUS "Using DIOPI_LIBRARY_PATH='${DIOPI_LIBRARY_PATH}', DIOPI_INCLUDE_ set(KINETO_BUILD_TESTS OFF CACHE INTERNAL "turn off tests") set(KINETO_USE_DEVICE_ACTIVITY ON CACHE INTERNAL "enable device activity") set(KINETO_COMPILED_WITH_CXX11_ABI "${DIPU_COMPILED_WITH_CXX11_ABI}" CACHE INTERNAL "pass through ABI settings") + # KINETO_COMPILED_WITH_CXX11_ABI might be removed from libkineto as we are -# using add_subdirectory instead of ExternalProject. add_subdirectory(kineto/libkineto SYSTEM) +target_compile_definitions(kineto PRIVATE DIPU_TORCH_VERSION=${DIPU_TORCH_VERSION}) diff --git a/dipu/torch_dipu/csrc_dipu/CMakeLists.txt b/dipu/torch_dipu/csrc_dipu/CMakeLists.txt index 751c47724..072bbbbf0 100644 --- a/dipu/torch_dipu/csrc_dipu/CMakeLists.txt +++ b/dipu/torch_dipu/csrc_dipu/CMakeLists.txt @@ -129,6 +129,7 @@ set(TORCH_DIPU_SOURCE add_library(torch_dipu SHARED ${TORCH_DIPU_SOURCE} ${GENERATED_KERNELS} ${VENDOR_FILES} ${VENDOR_OUTPUT_HEADER}) target_include_directories(torch_dipu PUBLIC ..) +target_compile_definitions(torch_dipu PUBLIC DIPU_TORCH_VERSION=${DIPU_TORCH_VERSION}) # TODO(vendor) - replace those code with target_link_libraries. target_include_directories(torch_dipu SYSTEM PUBLIC ${VENDOR_INCLUDE_DIRS} ${VENDOR_DIST_DIR}) diff --git a/dipu/torch_dipu/csrc_dipu/base/basedef.h b/dipu/torch_dipu/csrc_dipu/base/basedef.h index f3904f627..8f416e8ef 100644 --- a/dipu/torch_dipu/csrc_dipu/base/basedef.h +++ b/dipu/torch_dipu/csrc_dipu/base/basedef.h @@ -6,8 +6,6 @@ #include "csrc_dipu/runtime/device/basedef.h" -#include "dependency.h" - #define DIPU_DEVICE_TYPE_MACRO XPU #define DIPU_AUTOGRAD_DEVICE_TYPE_MACRO \ C10_CONCATENATE(Autograd, DIPU_DEVICE_TYPE_MACRO) diff --git a/dipu/torch_dipu/csrc_dipu/base/dependency.h b/dipu/torch_dipu/csrc_dipu/base/dependency.h deleted file mode 100644 index 883a5f7bc..000000000 --- a/dipu/torch_dipu/csrc_dipu/base/dependency.h +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright (c) 2023, DeepLink. -#pragma once - -#include - -#if TORCH_VERSION_MINOR < 100 && TORCH_VERSION_PATCH < 100 -#define DIPU_TORCH_VERSION \ - ((TORCH_VERSION_MAJOR * 100 + TORCH_VERSION_MINOR) * 100 + \ - TORCH_VERSION_PATCH) -#else -#error "require refactoring: version number exceeds limit" -#endif diff --git a/dipu/torch_dipu/csrc_dipu/profiler/collection.cpp b/dipu/torch_dipu/csrc_dipu/profiler/collection.cpp index 0f3059368..4ccaaa190 100644 --- a/dipu/torch_dipu/csrc_dipu/profiler/collection.cpp +++ b/dipu/torch_dipu/csrc_dipu/profiler/collection.cpp @@ -22,8 +22,6 @@ #include #include -#include "csrc_dipu/base/dependency.h" - #include "profiler_python.h" namespace dipu {