Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1453,7 +1453,12 @@ if (onnxruntime_USE_CUDA)
message(STATUS "CUDA Toolkit version is greater or equal than 12.8, enable -DENABLE_FP4 flag")
endif()

set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin=-compress-all")
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin=-compress-all -compress-mode=size")
else()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin=-compress-all")
endif()

set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --Werror default-stream-launch")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ extends:
msbuildPlatform: x64
packageName: x64-cuda
CudaVersion: ${{ parameters.CudaVersion }}
buildparameter: --use_cuda --cuda_home=${{ variables.win_cuda_home }} --enable_onnx_tests --use_webgpu --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52-real;61-real;75-real;86-real;89-real;90-virtual"
buildparameter: --use_cuda --cuda_home=${{ variables.win_cuda_home }} --enable_onnx_tests --use_webgpu --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52-real;61-real;75-real;86-real;89-real;120-real;120-virtual"
runTests: false
buildJava: false
java_artifact_id: onnxruntime_gpu
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ stages:
msbuildPlatform: x64
packageName: x64-cuda
CudaVersion: ${{ parameters.CudaVersion }}
buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75-real;86-real;89-real;90-virtual"
buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75-real;86-real;89-real;120-real;120-virtual"
runTests: ${{ parameters.RunOnnxRuntimeTests }}
buildJava: ${{ parameters.buildJava }}
java_artifact_id: onnxruntime_gpu
Expand All @@ -80,7 +80,7 @@ stages:
msbuildPlatform: x64
CudaVersion: ${{ parameters.CudaVersion }}
packageName: x64-tensorrt
buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75-real;86-real;89-real;90-virtual"
buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75-real;86-real;89-real;120-real;120-virtual"
runTests: ${{ parameters.RunOnnxRuntimeTests }}
buildJava: ${{ parameters.buildJava }}
java_artifact_id: onnxruntime_gpu
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ stages:
PYTHON_VERSION: ${{ python_version }}
EP_NAME: gpu
CudaVersion: ${{ parameters.cuda_version }}
EP_BUILD_FLAGS: --enable_lto --use_cuda --cuda_home=$(Agent.TempDirectory)\v${{ parameters.cuda_version }} --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52-real;61-real;75-real;86-real;89-real;90-virtual"
EP_BUILD_FLAGS: --enable_lto --use_cuda --cuda_home=$(Agent.TempDirectory)\v${{ parameters.cuda_version }} --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52-real;61-real;75-real;86-real;89-real;120-real;120-virtual"
use_tensorrt: True

- template: py-linux-gpu-stage.yml
Expand Down
2 changes: 1 addition & 1 deletion tools/ci_build/github/linux/build_cuda_c_api_package.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
set -e -x
docker run -e SYSTEM_COLLECTIONURI --rm --volume \
$BUILD_SOURCESDIRECTORY:/onnxruntime_src --volume $BUILD_BINARIESDIRECTORY:/build -e NIGHTLY_BUILD onnxruntimecuda${CUDA_VERSION_MAJOR}build \

Check warning on line 4 in tools/ci_build/github/linux/build_cuda_c_api_package.sh

View workflow job for this annotation

GitHub Actions / Optional Lint

[shellcheck] reported by reviewdog 🐶 Double quote to prevent globbing and word splitting. Raw Output: ./tools/ci_build/github/linux/build_cuda_c_api_package.sh:4:115: info: Double quote to prevent globbing and word splitting. (ShellCheck.SC2086)

Check warning on line 4 in tools/ci_build/github/linux/build_cuda_c_api_package.sh

View workflow job for this annotation

GitHub Actions / Optional Lint

[shellcheck] reported by reviewdog 🐶 Double quote to prevent globbing and word splitting. Raw Output: ./tools/ci_build/github/linux/build_cuda_c_api_package.sh:4:51: info: Double quote to prevent globbing and word splitting. (ShellCheck.SC2086)

Check warning on line 4 in tools/ci_build/github/linux/build_cuda_c_api_package.sh

View workflow job for this annotation

GitHub Actions / Optional Lint

[shellcheck] reported by reviewdog 🐶 Double quote to prevent globbing and word splitting. Raw Output: ./tools/ci_build/github/linux/build_cuda_c_api_package.sh:4:1: info: Double quote to prevent globbing and word splitting. (ShellCheck.SC2086)
/bin/bash -c "/usr/bin/python3 /onnxruntime_src/tools/ci_build/build.py --enable_lto --build_java --build_nodejs --build_dir /build --config Release --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --use_cuda --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr/local/cuda-$CUDA_VERSION --skip_tests --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=60-real;70-real;75-real;80-real;90a-real;90-virtual' 'onnxruntime_USE_FPA_INTB_GEMM=OFF' && cd /build/Release && make install DESTDIR=/build/installed"
/bin/bash -c "/usr/bin/python3 /onnxruntime_src/tools/ci_build/build.py --enable_lto --build_java --build_nodejs --build_dir /build --config Release --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --use_cuda --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr/local/cuda-$CUDA_VERSION --skip_tests --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=60-real;70-real;75-real;80-real;90-real;120-real;120-virtual' 'onnxruntime_USE_FPA_INTB_GEMM=OFF' && cd /build/Release && make install DESTDIR=/build/installed"
2 changes: 1 addition & 1 deletion tools/ci_build/github/linux/build_linux_python_package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ fi
if [ "$BUILD_DEVICE" == "GPU" ]; then
SHORT_CUDA_VERSION=$(echo $CUDA_VERSION | sed 's/\([[:digit:]]\+\.[[:digit:]]\+\)\.[[:digit:]]\+/\1/')
#Enable CUDA and TRT EPs.
BUILD_ARGS+=("--use_cuda" "--use_tensorrt" "--cuda_version=$SHORT_CUDA_VERSION" "--tensorrt_home=/usr" "--cuda_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cudnn_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--nvcc_threads=1" "--cmake_extra_defines" "CMAKE_CUDA_ARCHITECTURES=60-real;70-real;75-real;80-real;86-real;90a-real;90-virtual" "onnxruntime_USE_FPA_INTB_GEMM=OFF")
BUILD_ARGS+=("--use_cuda" "--use_tensorrt" "--cuda_version=$SHORT_CUDA_VERSION" "--tensorrt_home=/usr" "--cuda_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cudnn_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--nvcc_threads=1" "--cmake_extra_defines" "CMAKE_CUDA_ARCHITECTURES=60-real;70-real;75-real;80-real;86-real;90-real;120-real;120-virtual" "onnxruntime_USE_FPA_INTB_GEMM=OFF")
fi

if [ "$BUILD_DEVICE" == "NPU" ]; then
Expand Down
2 changes: 1 addition & 1 deletion tools/ci_build/github/linux/build_nodejs_package.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
set -e -x
mkdir -p $HOME/.onnx

Check warning on line 3 in tools/ci_build/github/linux/build_nodejs_package.sh

View workflow job for this annotation

GitHub Actions / Optional Lint

[shellcheck] reported by reviewdog 🐶 Double quote to prevent globbing and word splitting. Raw Output: ./tools/ci_build/github/linux/build_nodejs_package.sh:3:10: info: Double quote to prevent globbing and word splitting. (ShellCheck.SC2086)
docker run -e SYSTEM_COLLECTIONURI --rm --volume /data/onnx:/data/onnx:ro --volume $BUILD_SOURCESDIRECTORY:/onnxruntime_src --volume $BUILD_BINARIESDIRECTORY:/build \

Check warning on line 4 in tools/ci_build/github/linux/build_nodejs_package.sh

View workflow job for this annotation

GitHub Actions / Optional Lint

[shellcheck] reported by reviewdog 🐶 Double quote to prevent globbing and word splitting. Raw Output: ./tools/ci_build/github/linux/build_nodejs_package.sh:4:84: info: Double quote to prevent globbing and word splitting. (ShellCheck.SC2086)
--volume /data/models:/build/models:ro --volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecuda${CUDA_VERSION_MAJOR}xtrt86build \
/bin/bash -c "/usr/bin/python3 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release --skip_tests --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --build_nodejs --use_webgpu --use_tensorrt --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr --tensorrt_home=/usr --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=60-real;70-real;75-real;80-real;90a-real;90-virtual' --use_vcpkg --use_vcpkg_ms_internal_asset_cache && cd /build/Release && make install DESTDIR=/build/installed"
/bin/bash -c "/usr/bin/python3 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release --skip_tests --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --build_nodejs --use_webgpu --use_tensorrt --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr --tensorrt_home=/usr --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=60-real;70-real;75-real;80-real;90-real;120-real;120-virtual' --use_vcpkg --use_vcpkg_ms_internal_asset_cache && cd /build/Release && make install DESTDIR=/build/installed"
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
set -e -x
mkdir -p $HOME/.onnx

Check warning on line 3 in tools/ci_build/github/linux/build_tensorrt_c_api_package.sh

View workflow job for this annotation

GitHub Actions / Optional Lint

[shellcheck] reported by reviewdog 🐶 Double quote to prevent globbing and word splitting. Raw Output: ./tools/ci_build/github/linux/build_tensorrt_c_api_package.sh:3:10: info: Double quote to prevent globbing and word splitting. (ShellCheck.SC2086)
docker run -e SYSTEM_COLLECTIONURI --rm --volume /data/onnx:/data/onnx:ro --volume $BUILD_SOURCESDIRECTORY:/onnxruntime_src --volume $BUILD_BINARIESDIRECTORY:/build \

Check warning on line 4 in tools/ci_build/github/linux/build_tensorrt_c_api_package.sh

View workflow job for this annotation

GitHub Actions / Optional Lint

[shellcheck] reported by reviewdog 🐶 Double quote to prevent globbing and word splitting. Raw Output: ./tools/ci_build/github/linux/build_tensorrt_c_api_package.sh:4:134: info: Double quote to prevent globbing and word splitting. (ShellCheck.SC2086)

Check warning on line 4 in tools/ci_build/github/linux/build_tensorrt_c_api_package.sh

View workflow job for this annotation

GitHub Actions / Optional Lint

[shellcheck] reported by reviewdog 🐶 Double quote to prevent globbing and word splitting. Raw Output: ./tools/ci_build/github/linux/build_tensorrt_c_api_package.sh:4:84: info: Double quote to prevent globbing and word splitting. (ShellCheck.SC2086)
--volume /data/models:/build/models:ro --volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecuda${CUDA_VERSION_MAJOR}xtrt86build \

Check warning on line 5 in tools/ci_build/github/linux/build_tensorrt_c_api_package.sh

View workflow job for this annotation

GitHub Actions / Optional Lint

[shellcheck] reported by reviewdog 🐶 Double quote to prevent globbing and word splitting. Raw Output: ./tools/ci_build/github/linux/build_tensorrt_c_api_package.sh:5:120: info: Double quote to prevent globbing and word splitting. (ShellCheck.SC2086)

Check warning on line 5 in tools/ci_build/github/linux/build_tensorrt_c_api_package.sh

View workflow job for this annotation

GitHub Actions / Optional Lint

[shellcheck] reported by reviewdog 🐶 Double quote to prevent globbing and word splitting. Raw Output: ./tools/ci_build/github/linux/build_tensorrt_c_api_package.sh:5:49: info: Double quote to prevent globbing and word splitting. (ShellCheck.SC2086)
/bin/bash -c "/usr/bin/python3 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release --skip_tests --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --build_java --build_nodejs --use_tensorrt --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr --tensorrt_home=/usr --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=60-real;70-real;75-real;80-real;90a-real;90-virtual' 'onnxruntime_USE_FPA_INTB_GEMM=OFF' --use_vcpkg --use_vcpkg_ms_internal_asset_cache && cd /build/Release && make install DESTDIR=/build/installed"
/bin/bash -c "/usr/bin/python3 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release --skip_tests --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --build_java --build_nodejs --use_tensorrt --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr --tensorrt_home=/usr --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=60-real;70-real;75-real;80-real;90-real;120-real;120-virtual' 'onnxruntime_USE_FPA_INTB_GEMM=OFF' --use_vcpkg --use_vcpkg_ms_internal_asset_cache && cd /build/Release && make install DESTDIR=/build/installed"
Loading