From 7153cbc304fe78d94b5ae3ca9ac5b7eaa66e7c72 Mon Sep 17 00:00:00 2001 From: Tianlei WU Date: Fri, 17 Oct 2025 17:29:13 -0700 Subject: [PATCH 1/6] Initial version --- .github/workflows/linux_cuda_ci.yml | 6 +- .github/workflows/linux_tensorrt_ci.yml | 6 +- .../build-perf-test-binaries-pipeline.yml | 2 +- .../cuda-packaging-pipeline.yml | 9 +-- .../custom-nuget-packaging-pipeline.yml | 4 +- .../azure-pipelines/jar_package_testing.yml | 4 +- ...-gpu-tensorrt-cuda-minimal-ci-pipeline.yml | 10 ++-- .../nuget/templates/test_linux.yml | 4 +- .../azure-pipelines/post-merge-jobs.yml | 6 +- .../py-cuda-package-test-pipeline.yml | 2 +- .../jobs/py-linux-cuda-package-test-job.yml | 10 ++-- .../stages/nodejs-linux-packaging-stage.yml | 8 +-- .../nuget-linux-cuda-packaging-stage.yml | 20 +++---- .../stages/nuget-win-cuda-packaging-stage.yml | 2 +- .../stages/py-gpu-packaging-stage.yml | 2 +- .../stages/py-linux-gpu-stage.yml | 10 ++-- .../stages/py-win-gpu-stage.yml | 8 +-- .../templates/common-variables.yml | 11 ++-- .../jobs/download_win_gpu_library.yml | 37 +++++++----- .../templates/jobs/set-winenv.yml | 36 ++++++------ .../py-packaging-linux-test-cuda.yml | 4 +- .../azure-pipelines/templates/win-ci.yml | 6 +- .../win-gpu-doc-gen-ci-pipeline.yml | 2 +- ...-gpu-tensorrt-cuda-minimal-ci-pipeline.yml | 6 +- .../linux/docker/Dockerfile.manylinux2_28_cpu | 2 +- .../docker/Dockerfile.manylinux2_28_cuda | 4 +- .../docker/Dockerfile.manylinux2_28_rocm | 2 +- .../docker/Dockerfile.manylinux2_28_webgpu | 2 +- .../Dockerfile.package_ubi8_cuda_tensorrt10_0 | 2 +- ...rfile.package_ubi8_cuda_tensorrt10_0_torch | 57 ------------------- .../Dockerfile.ubuntu_cuda12_tensorrt10 | 2 +- .../docker/Dockerfile.ubuntu_tensorrt_bin | 6 +- .../inference/aarch64/default/cpu/Dockerfile | 2 +- .../inference/aarch64/python/cpu/Dockerfile | 2 +- .../inference/x86_64/default/cpu/Dockerfile | 2 +- .../x86_64/default/cuda12/Dockerfile | 2 +- .../inference/x86_64/python/cpu/Dockerfile | 2 +- .../inference/x86_64/python/cuda/Dockerfile | 4 +- .../x86_64/python/openvino/Dockerfile | 2 +- .../docker/scripts/install_python_deps.sh | 7 +-- .../github/windows/setup_env_cuda.bat | 37 +++++++++--- .../ci_build/github/windows/setup_env_gpu.bat | 21 ------- .../ci_build/github/windows/setup_env_trt.bat | 11 ---- 43 files changed, 162 insertions(+), 222 deletions(-) delete mode 100644 tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0_torch delete mode 100644 tools/ci_build/github/windows/setup_env_gpu.bat delete mode 100644 tools/ci_build/github/windows/setup_env_trt.bat diff --git a/.github/workflows/linux_cuda_ci.yml b/.github/workflows/linux_cuda_ci.yml index 61f994fcebd0e..ae15d03b81853 100644 --- a/.github/workflows/linux_cuda_ci.yml +++ b/.github/workflows/linux_cuda_ci.yml @@ -27,9 +27,9 @@ jobs: build_config: Release architecture: x64 dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda - docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250124.1' + docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc14:20251017.1' docker_image_repo: onnxruntimecuda12manylinuxbuild - extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cuda_version=12.2 --cuda_home=/usr/local/cuda-12.2 --cudnn_home=/usr/local/cuda-12.2 --enable_cuda_profiling --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON' + extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cuda_version=12.8 --cuda_home=/usr/local/cuda-12.8 --cudnn_home=/usr/local/cuda-12.8 --enable_cuda_profiling --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON' python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH' run_tests: false # <<< Do not run tests in this job upload_build_output: true # <<< Upload the build/Release directory @@ -55,7 +55,7 @@ jobs: with: dockerfile: ${{ github.workspace }}/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda image-name: ghcr.io/microsoft/onnxruntime/onnxruntimecuda12manylinuxbuild - build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250124.1' + build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc14:20251017.1' push: true azure-container-registry-name: onnxruntimebuildcache env: diff --git a/.github/workflows/linux_tensorrt_ci.yml b/.github/workflows/linux_tensorrt_ci.yml index 9fb6625466c72..f30964320e653 100644 --- a/.github/workflows/linux_tensorrt_ci.yml +++ b/.github/workflows/linux_tensorrt_ci.yml @@ -27,9 +27,9 @@ jobs: build_config: Release architecture: x64 dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda - docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250124.1 --build-arg TRT_VERSION=10.9.0.34-1.cuda12.8 --network=host' + docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc14:20251017.1 --build-arg TRT_VERSION=10.9.0.34-1.cuda12.8 --network=host' docker_image_repo: onnxruntimetensorrt86gpubuild - extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cuda_version=12.2 --cuda_home=/usr/local/cuda-12.2 --cudnn_home=/usr/local/cuda-12.2 --use_tensorrt --tensorrt_home /usr --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON' + extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cuda_version=12.8 --cuda_home=/usr/local/cuda-12.8 --cudnn_home=/usr/local/cuda-12.8 --use_tensorrt --tensorrt_home /usr --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON' python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH' run_tests: false # <<< Do not run tests in this job upload_build_output: true # <<< Upload the build/Release directory @@ -57,7 +57,7 @@ jobs: with: dockerfile: ${{ github.workspace }}/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda image-name: ghcr.io/microsoft/onnxruntime/onnxruntimetensorrt86gpubuild - build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250124.1 --build-arg TRT_VERSION=10.9.0.34-1.cuda12.8 --network=host' + build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc14:20251017.1 --build-arg TRT_VERSION=10.9.0.34-1.cuda12.8 --network=host' push: true azure-container-registry-name: onnxruntimebuildcache env: diff --git a/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml b/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml index e54216fe4ef4e..64f8146b25fe4 100644 --- a/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml @@ -32,4 +32,4 @@ stages: extra_build_arg: '' cmake_build_type: Release cuda_version: 12.8 - docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc12:20250714.2 \ No newline at end of file + docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1 diff --git a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml index d7fc0efbf45ea..8390295388c6d 100644 --- a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml @@ -51,19 +51,20 @@ parameters: default: '12.8' values: - 12.8 + - 13.0 variables: - template: templates/common-variables.yml - name: ReleaseVersionSuffix value: '' - name: win_trt_home - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: $(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda11 }} + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: $(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda13 }} ${{ if eq(parameters.CudaVersion, '12.8') }}: value: $(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda12 }} - name: win_cuda_home - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: $(Agent.TempDirectory)\v11.8 + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: $(Agent.TempDirectory)\v13.0 ${{ if eq(parameters.CudaVersion, '12.8') }}: value: $(Agent.TempDirectory)\v12.8 diff --git a/tools/ci_build/github/azure-pipelines/custom-nuget-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/custom-nuget-packaging-pipeline.yml index eff2b4d885721..fe0f2427d31d9 100644 --- a/tools/ci_build/github/azure-pipelines/custom-nuget-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/custom-nuget-packaging-pipeline.yml @@ -38,8 +38,8 @@ variables: - name: ReleaseVersionSuffix value: '' - name: win_cuda_home - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: $(Agent.TempDirectory)\v11.8 + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: $(Agent.TempDirectory)\v13.0 ${{ if eq(parameters.CudaVersion, '12.8') }}: value: $(Agent.TempDirectory)\v12.8 diff --git a/tools/ci_build/github/azure-pipelines/jar_package_testing.yml b/tools/ci_build/github/azure-pipelines/jar_package_testing.yml index 463c02203e21a..9d831df54096a 100644 --- a/tools/ci_build/github/azure-pipelines/jar_package_testing.yml +++ b/tools/ci_build/github/azure-pipelines/jar_package_testing.yml @@ -45,7 +45,7 @@ stages: DownloadTRT: true - template: templates/setup-maven.yml - + - task: Maven@4 displayName: 'Download Java Dependencies' inputs: @@ -105,7 +105,7 @@ stages: - name: runCodesignValidationInjection value: false - name: docker_base_image - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc12:20251008.2 + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1 timeoutInMinutes: 60 steps: - checkout: self diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml index 5e6671e3797ce..829b44af32411 100644 --- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml @@ -38,13 +38,13 @@ parameters: variables: - template: templates/common-variables.yml - name: docker_base_image - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20251008.2 + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda13_x64_almalinux8_gcc14:20251017.1 ${{ if eq(parameters.CudaVersion, '12.8') }}: - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc12:20251008.2 + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1 - name: linux_trt_version - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: ${{ variables.linux_trt_version_cuda11 }} + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: ${{ variables.linux_trt_version_cuda13 }} ${{ if eq(parameters.CudaVersion, '12.8') }}: value: ${{ variables.linux_trt_version_cuda12 }} diff --git a/tools/ci_build/github/azure-pipelines/nuget/templates/test_linux.yml b/tools/ci_build/github/azure-pipelines/nuget/templates/test_linux.yml index b60ef7576184e..b26c96892952b 100644 --- a/tools/ci_build/github/azure-pipelines/nuget/templates/test_linux.yml +++ b/tools/ci_build/github/azure-pipelines/nuget/templates/test_linux.yml @@ -9,7 +9,7 @@ parameters: SpecificArtifact: false CustomOpArtifactName: 'onnxruntime-linux-x64' BuildId: '0' - CudaVersion: '11.8' + CudaVersion: '12.8' stages: - stage: NuGet_Test_Linux_${{ parameters.StageSuffix }}${{ parameters.MoreSuffix }} dependsOn: @@ -41,7 +41,7 @@ stages: - script: | mv $(Pipeline.Workspace)/build/drop-signed-nuget-${{ parameters.ArtifactSuffix }} $(Build.BinariesDirectory)/nuget-artifact mv $(Pipeline.Workspace)/build/${{ parameters.CustomOpArtifactName }} $(Build.BinariesDirectory)/testdata - + - template: get-nuget-package-version-as-variable.yml parameters: diff --git a/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml b/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml index fdfafd4d9a179..81a7aad8d0088 100644 --- a/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml +++ b/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml @@ -9,8 +9,8 @@ parameters: variables: - template: templates/common-variables.yml - name: win_trt_folder - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: ${{ variables.win_trt_folder_cuda11 }} + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: ${{ variables.win_trt_folder_cuda13 }} ${{ if eq(parameters.CudaVersion, '12.8') }}: value: ${{ variables.win_trt_folder_cuda12 }} @@ -242,7 +242,7 @@ stages: - template: templates/jobs/win-ci-vs-2022-job.yml parameters: BuildConfig: 'RelWithDebInfo' - EnvSetupScript: setup_env_trt.bat + EnvSetupScript: setup_env_cuda.bat buildArch: x64 additionalBuildFlags: --config RelWithDebInfo --parallel --use_binskim_compliant_compile_flags --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\${{ variables.win_trt_folder }}" --cuda_home="$(Agent.TempDirectory)\v${{ parameters.CudaVersion }}" --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 msbuildPlatform: x64 diff --git a/tools/ci_build/github/azure-pipelines/py-cuda-package-test-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-cuda-package-test-pipeline.yml index 02b6a6df76611..8beae99218867 100644 --- a/tools/ci_build/github/azure-pipelines/py-cuda-package-test-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/py-cuda-package-test-pipeline.yml @@ -18,7 +18,7 @@ stages: machine_pool: 'Onnxruntime-Linux-GPU' python_wheel_suffix: '_gpu' timeout: 480 - docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc12:20251008.2 + docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1 cuda_version: '12.8' - stage: Republish_Wheels diff --git a/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml b/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml index b53aee639372d..dde00c7a36852 100644 --- a/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml +++ b/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml @@ -43,13 +43,13 @@ jobs: variables: - template: ../../templates/common-variables.yml - name: docker_base_image - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20251008.2 + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda13_x64_almalinux8_gcc14:20251017.1 ${{ if eq(parameters.CudaVersion, '12.8') }}: - value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc12:20251008.2 + value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1 - name: linux_trt_version - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: ${{ variables.linux_trt_version_cuda11 }} + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: ${{ variables.linux_trt_version_cuda13 }} ${{ if eq(parameters.CudaVersion, '12.8') }}: value: ${{ variables.linux_trt_version_cuda12 }} pool: ${{ parameters.machine_pool }} diff --git a/tools/ci_build/github/azure-pipelines/stages/nodejs-linux-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nodejs-linux-packaging-stage.yml index 8cbb81ba89c12..ff35d3e35ef6c 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nodejs-linux-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nodejs-linux-packaging-stage.yml @@ -18,15 +18,15 @@ stages: variables: - template: ../templates/common-variables.yml - name: CUDA_VERSION_MAJOR - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: '11' + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: '13' ${{ if eq(parameters.CudaVersion, '12.8') }}: value: '12' - name: CUDA_VERSION value: ${{ parameters.CudaVersion }} - name: linux_trt_version - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: ${{ variables.linux_trt_version_cuda11 }} + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: ${{ variables.linux_trt_version_cuda13 }} ${{ if eq(parameters.CudaVersion, '12.8') }}: value: ${{ variables.linux_trt_version_cuda12 }} steps: diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml index b1e5f541b90e0..1ab7155d8abc9 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml @@ -20,8 +20,8 @@ stages: os: linux variables: - name: CUDA_VERSION_MAJOR - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: '11' + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: '13' ${{ if eq(parameters.CudaVersion, '12.8') }}: value: '12' - name: CUDA_VERSION @@ -72,15 +72,15 @@ stages: variables: - template: ../templates/common-variables.yml - name: CUDA_VERSION_MAJOR - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: '11' + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: '13' ${{ if eq(parameters.CudaVersion, '12.8') }}: value: '12' - name: CUDA_VERSION value: ${{ parameters.CudaVersion }} - name: linux_trt_version - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: ${{ variables.linux_trt_version_cuda11 }} + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: ${{ variables.linux_trt_version_cuda13 }} ${{ if eq(parameters.CudaVersion, '12.8') }}: value: ${{ variables.linux_trt_version_cuda12 }} steps: @@ -138,13 +138,13 @@ stages: variables: - template: ../templates/common-variables.yml - name: CUDA_VERSION_MAJOR - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: '11' + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: '13' ${{ if eq(parameters.CudaVersion, '12.8') }}: value: '12' - name: linux_trt_version - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: ${{ variables.linux_trt_version_cuda11 }} + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: ${{ variables.linux_trt_version_cuda13 }} ${{ if eq(parameters.CudaVersion, '12.8') }}: value: ${{ variables.linux_trt_version_cuda12 }} steps: diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml index e7e541205ba0a..a948a3e6aff5a 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml @@ -13,7 +13,7 @@ parameters: - name: CudaVersion type: string - default: '11.8' + default: '13.0' - name: win_cuda_home type: string diff --git a/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml index 3c5cf591039e0..755c6e0e88bd6 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml @@ -48,4 +48,4 @@ stages: extra_build_arg: ${{ parameters.build_py_parameters }} cmake_build_type: ${{ parameters.cmake_build_type }} cuda_version: ${{ parameters.cuda_version }} - docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc12:20251008.2 + docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1 diff --git a/tools/ci_build/github/azure-pipelines/stages/py-linux-gpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-linux-gpu-stage.yml index ab1fb919af413..f5b2c05931808 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-linux-gpu-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-linux-gpu-stage.yml @@ -53,8 +53,8 @@ stages: value: '' - template: ../templates/common-variables.yml - name: trt_version - ${{ if eq(parameters.cuda_version, '11.8') }}: - value: ${{ variables.linux_trt_version_cuda11 }} + ${{ if eq(parameters.cuda_version, '13.0') }}: + value: ${{ variables.linux_trt_version_cuda13 }} ${{ if eq(parameters.cuda_version, '12.8') }}: value: ${{ variables.linux_trt_version_cuda12 }} steps: @@ -81,16 +81,14 @@ stages: - script: | set -e -x - mv $(Build.BinariesDirectory)/${{ parameters.cmake_build_type }} ./${{ parameters.cmake_build_type }} + mv $(Build.BinariesDirectory)/${{ parameters.cmake_build_type }} ./${{ parameters.cmake_build_type }} mv $(Build.BinariesDirectory)/dist ./dist pushd dist find . -name \*.whl -exec unzip -qq -o {} \; rm -r onnxruntime popd - pushd ${{ parameters.cmake_build_type }} + pushd ${{ parameters.cmake_build_type }} find . -name \*.whl -exec unzip -qq -o {} \; popd workingDirectory: '$(Build.ArtifactStagingDirectory)' displayName: 'Move files' - - diff --git a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml index c3957fc8341de..0c163f74768ca 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml @@ -74,8 +74,8 @@ stages: - name: CUDA_MODULE_LOADING value: 'LAZY' - name: win_trt_folder - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: ${{ variables.win_trt_folder_cuda11 }} + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: ${{ variables.win_trt_folder_cuda13 }} ${{ if eq(parameters.CudaVersion, '12.8') }}: value: ${{ variables.win_trt_folder_cuda12 }} - name: trt_build_flag @@ -119,7 +119,7 @@ stages: --cmake_generator "$(VSGenerator)" --enable_pybind --enable_onnx_tests - --parallel 8 --use_vcpkg --use_vcpkg_ms_internal_asset_cache --use_binskim_compliant_compile_flags --update --build + --parallel 8 --use_vcpkg --use_vcpkg_ms_internal_asset_cache --use_binskim_compliant_compile_flags --update --build $(TelemetryOption) ${{ parameters.BUILD_PY_PARAMETERS }} ${{ parameters.EP_BUILD_FLAGS }} ${{ variables.trt_build_flag }} workingDirectory: '$(Build.BinariesDirectory)' @@ -213,7 +213,7 @@ stages: TMPDIR: "$(Agent.TempDirectory)" - powershell: | - $ErrorActionPreference = "Stop" + $ErrorActionPreference = "Stop" python -m pip uninstall -y onnxruntime onnxruntime-${{ parameters.EP_NAME }} -qq dir $(Build.ArtifactStagingDirectory) python -m pip --disable-pip-version-check install --no-index --find-links $(Build.ArtifactStagingDirectory) onnxruntime-${{ parameters.EP_NAME }} diff --git a/tools/ci_build/github/azure-pipelines/templates/common-variables.yml b/tools/ci_build/github/azure-pipelines/templates/common-variables.yml index 39a958e848784..a0ac2cfad1a93 100644 --- a/tools/ci_build/github/azure-pipelines/templates/common-variables.yml +++ b/tools/ci_build/github/azure-pipelines/templates/common-variables.yml @@ -1,7 +1,8 @@ variables: - common_trt_version: '10.9.0.34' + cuda12_trt_version: '10.9.0.34' + cuda13_trt_version: '10.13.3.9' # As for Debian installation, replace '-1.' by '-1+' when assigning trt version below - linux_trt_version_cuda11: ${{ variables.common_trt_version }}-1.cuda11.8 - linux_trt_version_cuda12: ${{ variables.common_trt_version }}-1.cuda12.8 - win_trt_folder_cuda11: TensorRT-${{ variables.common_trt_version }}.Windows10.x86_64.cuda-11.8 - win_trt_folder_cuda12: TensorRT-${{ variables.common_trt_version }}.Windows10.x86_64.cuda-12.8 \ No newline at end of file + linux_trt_version_cuda13: ${{ variables.cuda13_trt_version }}-1.cuda13.0 + linux_trt_version_cuda12: ${{ variables.cuda12_trt_version }}-1.cuda12.8 + win_trt_folder_cuda13: TensorRT-${{ variables.cuda13_trt_version }}.Windows10.win10.cuda-13.0 + win_trt_folder_cuda12: TensorRT-${{ variables.cuda12_trt_version }}.Windows10.x86_64.cuda-12.8 diff --git a/tools/ci_build/github/azure-pipelines/templates/jobs/download_win_gpu_library.yml b/tools/ci_build/github/azure-pipelines/templates/jobs/download_win_gpu_library.yml index be213337091e8..631f40bcdd22a 100644 --- a/tools/ci_build/github/azure-pipelines/templates/jobs/download_win_gpu_library.yml +++ b/tools/ci_build/github/azure-pipelines/templates/jobs/download_win_gpu_library.yml @@ -9,14 +9,14 @@ parameters: type: string default: '12.8' values: - - 11.8 + - 13.0 - 12.8 - name: TrtVersion type: string default: '10.9.0.34' values: - - 8.6.1.6 - 10.9.0.34 + - 10.13.3.9 steps: - ${{ if eq(parameters.DownloadCUDA, true) }}: @@ -42,37 +42,48 @@ steps: displayName: 'Print PATH after download CUDA SDK' - ${{ if eq(parameters.DownloadTRT, true) }}: - - ${{ if eq(parameters.CudaVersion, '11.8') }}: + - ${{ if eq(parameters.CudaVersion, '13.0') }}: - powershell: | - Write-Host "##vso[task.setvariable variable=trtCudaVersion;]11.8" + Write-Host "##vso[task.setvariable variable=trtCudaVersion;]13.0" displayName: Set trtCudaVersion - - ${{ if and(eq(parameters.CudaVersion, '12.8'), eq(parameters.TrtVersion, '8.6.1.6')) }}: + - ${{ if and(eq(parameters.CudaVersion, '12.8'), eq(parameters.TrtVersion, '10.13.3.9')) }}: - powershell: | - Write-Host "##vso[task.setvariable variable=trtCudaVersion;]12.0" + Write-Host "##vso[task.setvariable variable=trtCudaVersion;]12.9" displayName: Set trtCudaVersion - ${{ if and(eq(parameters.CudaVersion, '12.8'), eq(parameters.TrtVersion, '10.9.0.34')) }}: - powershell: | Write-Host "##vso[task.setvariable variable=trtCudaVersion;]12.8" displayName: Set trtCudaVersion - - script: | - echo $(trtCudaVersion) && echo TensorRT-${{ parameters.TrtVersion }}.Windows10.x86_64.cuda-$(trtCudaVersion) - displayName: Get trtCudaVersion and Directory Name + - ${{ if eq(parameters.TrtVersion, '10.9.0.34') }}: + - powershell: | + Write-Host "##vso[task.setvariable variable=trtPlatformString;]Windows10.x86_64" + displayName: 'Set TRT platform string for 10.9.0.34' + - ${{ if eq(parameters.TrtVersion, '10.13.3.9') }}: + - powershell: | + Write-Host "##vso[task.setvariable variable=trtPlatformString;]Windows.win10" + displayName: 'Set TRT platform string for 10.13.3.9' + + - powershell: | + $trtDirName = "TensorRT-${{ parameters.TrtVersion }}.$(trtPlatformString).cuda-$(trtCudaVersion)" + Write-Host "TensorRT Directory Name: $trtDirName" + Write-Host "##vso[task.setvariable variable=trtDirName;]$trtDirName" + displayName: 'Construct TensorRT Directory Name' - task: AzureCLI@2 - displayName: 'Download TensorRT-${{ parameters.TrtVersion }}.Windows10.x86_64.cuda-$(trtCudaVersion)' + displayName: 'Download $(trtDirName)' inputs: azureSubscription: AIInfraBuildOnnxRuntimeOSS scriptType: 'batch' scriptLocation: 'inlineScript' inlineScript: | set AZCOPY_AUTO_LOGIN_TYPE=AZCLI - azcopy.exe cp --recursive https://lotusscus.blob.core.windows.net/models/local/TensorRT-${{ parameters.TrtVersion }}.Windows10.x86_64.cuda-$(trtCudaVersion) $(Agent.TempDirectory) + azcopy.exe cp --recursive https://lotusscus.blob.core.windows.net/models/local/$(trtDirName) $(Agent.TempDirectory) - powershell: | - Write-Host "##vso[task.prependpath]$(Agent.TempDirectory)\TensorRT-${{ parameters.TrtVersion }}.Windows10.x86_64.cuda-$(trtCudaVersion)\lib" - displayName: 'Append TensorRT-${{ parameters.TrtVersion }} Directory to PATH' + Write-Host "##vso[task.prependpath]$(Agent.TempDirectory)\$(trtDirName)\lib" + displayName: 'Append $(trtDirName) Directory to PATH' - task: CmdLine@2 inputs: diff --git a/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml b/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml index d7c940cda30f4..307cec714a48b 100644 --- a/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml +++ b/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml @@ -7,15 +7,15 @@ parameters: - name: DownloadTRT type: boolean default: false - - name: PrimaryCUDAVersion + - name: CUDAVersion type: string default: '12.8' -# - name: SecondaryCUDAVersion -# type: string -# default: '11.8' -# - name: win_trt_folder_cuda11 -# type: string -# default: 'TensorRT-10.9.0.34.Windows10.x86_64.cuda-11.8' + values: + - 13.0 + - 12.8 + - name: win_trt_folder_cuda13 + type: string + default: 'TensorRT-10.13.3.9.Windows.win10.cuda-13.0' - name: win_trt_folder_cuda12 type: string default: 'TensorRT-10.9.0.34.Windows10.x86_64.cuda-12.8' @@ -23,18 +23,18 @@ parameters: steps: - ${{ if eq(parameters.DownloadCUDA, 'true') }}: - powershell: | - azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ parameters.PrimaryCUDAVersion }}" $(Agent.TempDirectory) - displayName: 'Download Primary CUDA SDK v${{ parameters.PrimaryCUDAVersion }}' -# - powershell: | -# azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ parameters.SecondaryCUDAVersion }}" $(Agent.TempDirectory) -# displayName: 'Download Secondary CUDA SDK v${{ parameters.SecondaryCUDAVersion }}' + azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ parameters.CUDAVersion }}" $(Agent.TempDirectory) + displayName: 'Download Primary CUDA SDK v${{ parameters.CUDAVersion }}' + - ${{ if eq(parameters.DownloadTRT, 'true') }}: - - powershell: | - azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/${{ parameters.win_trt_folder_cuda12 }}" $(Agent.TempDirectory) - displayName: 'Download ${{ parameters.win_trt_folder_cuda12 }}' -# - powershell: | -# azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/${{ parameters.win_trt_folder_cuda11 }}" $(Agent.TempDirectory) -# displayName: 'Download ${{ parameters.win_trt_folder_cuda11 }}' + - ${{ if eq(parameters.CUDAVersion, '12.8') }}: + - powershell: | + azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/${{ parameters.win_trt_folder_cuda12 }}" $(Agent.TempDirectory) + displayName: 'Download ${{ parameters.win_trt_folder_cuda12 }}' + - ${{ if eq(parameters.CUDAVersion, '13.0') }}: + - powershell: | + azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/${{ parameters.win_trt_folder_cuda13 }}" $(Agent.TempDirectory) + displayName: 'Download ${{ parameters.win_trt_folder_cuda13 }}' - task: BatchScript@1 displayName: 'setup env' diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml index 263f73a9e29b0..3244008eb281a 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml @@ -44,8 +44,8 @@ jobs: - name: skipComponentGovernanceDetection value: true - name: trt_version - ${{ if eq(parameters.cuda_version, '11.8') }}: - value: ${{ variables.linux_trt_version_cuda11 }} + ${{ if eq(parameters.cuda_version, '13.0') }}: + value: ${{ variables.linux_trt_version_cuda13 }} ${{ if eq(parameters.cuda_version, '12.8') }}: value: ${{ variables.linux_trt_version_cuda12 }} workspace: diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index ca698123a04e7..2d4d05755eb24 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -75,9 +75,9 @@ parameters: - name: CudaVersion type: string - default: '11.8' + default: '12.8' values: - - 11.8 + - 13.0 - 12.8 - name: SpecificArtifact @@ -123,7 +123,7 @@ stages: - output: pipelineArtifact targetPath: $(Build.ArtifactStagingDirectory) artifactName: 'onnxruntime${{ parameters.artifact_name_suffix }}-win-${{ parameters.packageName }}' - + - ${{ if eq(parameters.buildJava, 'true') }}: - output: pipelineArtifact targetPath: $(Build.BinariesDirectory)\onnxruntime-java-win-${{ parameters.msbuildPlatform }} diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-doc-gen-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-doc-gen-ci-pipeline.yml index 8b320b0ceb4ac..3724e2f6a2493 100644 --- a/tools/ci_build/github/azure-pipelines/win-gpu-doc-gen-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-gpu-doc-gen-ci-pipeline.yml @@ -34,7 +34,7 @@ parameters: type: string default: '12.8' values: - - 11.8 + - 13.0 - 12.8 stages: diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml index 08953749f6527..893de1598695c 100644 --- a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml @@ -38,8 +38,8 @@ parameters: variables: - template: templates/common-variables.yml - name: win_trt_folder - ${{ if eq(parameters.CudaVersion, '11.8') }}: - value: ${{ variables.win_trt_folder_cuda11 }} + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: ${{ variables.win_trt_folder_cuda13 }} ${{ if eq(parameters.CudaVersion, '12.8') }}: value: ${{ variables.win_trt_folder_cuda12 }} @@ -48,7 +48,7 @@ jobs: pool: 'onnxruntime-Win2022-GPU-A10' variables: MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary' - EnvSetupScript: setup_env_trt.bat + EnvSetupScript: setup_env_cuda.bat skipComponentGovernanceDetection: true TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)] timeoutInMinutes: 150 diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu index a277286866e41..56c10d87366fb 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cpu @@ -1,4 +1,4 @@ -ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14:20251008.2 +ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14:20251017.1 FROM $BASEIMAGE ENV JAVA_HOME=/usr/lib/jvm/msopenjdk-17 diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda index 489e4ce9f3913..6c70549bd3c2b 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda @@ -1,8 +1,8 @@ -# The default ARGs are for cuda 11.8 with cudnn8, TensorRT is optional +# The default ARGs are for cuda 13.0 with cudnn8, TensorRT is optional # Please overwrite BASEIMAGE, TRT_VERSION and other arguments with # --docker-build-args ' --build-arg BASEIMAGE=other_base_image --build-arg TRT_VERSION=other_trt_version etc...' # for other cuda version and TRT version -ARG BASEIMAGE=nvidia/cuda:12.5.1-cudnn-devel-ubi8 +ARG BASEIMAGE=nvidia/cuda:12.8.1-cudnn-devel-ubi8 FROM $BASEIMAGE ARG TRT_VERSION=10.9.0.34-1.cuda12.8 diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm index 5410bd64036ce..ac72d043eb182 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm @@ -1,4 +1,4 @@ -ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14:20251008.2 +ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14:20251017.1 FROM $BASEIMAGE ARG ROCM_VERSION=6.2.3 diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_webgpu b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_webgpu index 07ad8e933baf0..dc6a355f32754 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_webgpu +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_webgpu @@ -1,4 +1,4 @@ -ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14:20251008.2 +ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14:20251017.1 FROM $BASEIMAGE ENV JAVA_HOME=/usr/lib/jvm/msopenjdk-17 diff --git a/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0 b/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0 index 1933fd371d3bc..83b1e97096fee 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0 +++ b/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0 @@ -5,7 +5,7 @@ # Dockerfile to Test ONNX Runtime on UBI8 with TensorRT 10 and CUDA 12 by default # Build base image with required system packages -ARG BASEIMAGE=nvidia/cuda:12.5.1-cudnn-devel-ubi8 +ARG BASEIMAGE=nvidia/cuda:12.8.1-cudnn-devel-ubi8 ARG TRT_VERSION=10.9.0.34-1.cuda12.8 FROM $BASEIMAGE AS base ARG TRT_VERSION diff --git a/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0_torch b/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0_torch deleted file mode 100644 index 62562705c92b2..0000000000000 --- a/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0_torch +++ /dev/null @@ -1,57 +0,0 @@ -# -------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------- -# Dockerfile to Test ONNX Runtime on UBI8 with TensorRT 10.0 and CUDA 11.8 by default - -# Build base image with required system packages -ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8 -ARG TRT_VERSION=10.9.0.34-1.cuda11.8 -FROM $BASEIMAGE AS base -ARG TRT_VERSION -ENV PATH=/opt/python/cp310-cp310/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${PATH} - -RUN dnf install -y bash wget &&\ - dnf clean dbcache - -RUN pip3 install --upgrade pip -RUN pip3 install setuptools>=68.2.2 - -#Install TensorRT only if TRT_VERSION is not empty -RUN if [ -n "$TRT_VERSION" ]; then \ - echo "TRT_VERSION is $TRT_VERSION" && \ - dnf -y install \ - libnvinfer10-${TRT_VERSION} \ - libnvinfer-headers-devel-${TRT_VERSION} \ - libnvinfer-devel-${TRT_VERSION} \ - libnvinfer-lean10-${TRT_VERSION} \ - libnvonnxparsers10-${TRT_VERSION} \ - libnvonnxparsers-devel-${TRT_VERSION} \ - libnvinfer-dispatch10-${TRT_VERSION} \ - libnvinfer-plugin10-${TRT_VERSION} \ - libnvinfer-vc-plugin10-${TRT_VERSION} \ - libnvinfer-bin-${TRT_VERSION} \ - libnvinfer-plugin10-${TRT_VERSION} \ - libnvinfer-plugin-devel-${TRT_VERSION} \ - libnvinfer-vc-plugin-devel-${TRT_VERSION} \ - libnvinfer-lean-devel-${TRT_VERSION} \ - libnvinfer-dispatch-devel-${TRT_VERSION} \ - libnvinfer-headers-plugin-devel-${TRT_VERSION} && \ - dnf clean dbcache ; \ -else \ - echo "TRT_VERSION is none skipping Tensor RT Installation" ; \ -fi - -ADD scripts /tmp/scripts -RUN cd /tmp/scripts && /tmp/scripts/install_dotnet.sh && /tmp/scripts/install_java.sh && rm -rf /tmp/scripts - -RUN python3 -m pip uninstall -y torch -RUN python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118 - -# Build final image from base. -FROM base as final -ARG BUILD_USER=onnxruntimedev -ARG BUILD_UID=1000 -RUN adduser --uid $BUILD_UID $BUILD_USER -WORKDIR /home/$BUILD_USER -USER $BUILD_USER diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda12_tensorrt10 b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda12_tensorrt10 index 1d3575411a692..80432f2dcea66 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda12_tensorrt10 +++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_cuda12_tensorrt10 @@ -5,7 +5,7 @@ # Dockerfile to run ONNXRuntime with TensorRT integration # Build base image with required system packages -FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu22.04 AS base +FROM nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04 AS base # The local directory into which to build and install CMAKE ARG ONNXRUNTIME_LOCAL_CODE_DIR=/code diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt_bin b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt_bin index 03f14732b70f8..511955cd767c5 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt_bin +++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt_bin @@ -5,7 +5,7 @@ # Dockerfile to run ONNXRuntime with TensorRT installed from provided binaries # Build base image with required system packages -FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu22.04 AS base +FROM nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04 AS base # The local directory into which to build and install CMAKE ARG ONNXRUNTIME_LOCAL_CODE_DIR=/code @@ -27,7 +27,7 @@ RUN apt-get install -y --no-install-recommends \ ln -s /usr/bin/python3 python &&\ ln -s /usr/bin/pip3 pip; -RUN pip install --upgrade pip +RUN pip install --upgrade pip RUN pip install setuptools>=68.2.2 # Install TensorRT @@ -100,4 +100,4 @@ RUN /bin/sh build.sh ${PARSER_CONFIG} --parallel --build_shared_lib --cuda_home USER root # Intall ORT wheel -RUN pip install ${ONNXRUNTIME_LOCAL_CODE_DIR}/onnxruntime/build/Linux/Release/dist/*.whl \ No newline at end of file +RUN pip install ${ONNXRUNTIME_LOCAL_CODE_DIR}/onnxruntime/build/Linux/Release/dist/*.whl diff --git a/tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile index cef2d11780969..e2ddd04ee4642 100644 --- a/tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile @@ -2,7 +2,7 @@ # Licensed under the MIT License. # This file is used by Zip-Nuget Packaging NoContribOps Pipeline,Zip-Nuget-Java Packaging Pipeline -ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_aarch64_almalinux8_gcc14_dotnet:20251008.2 +ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_aarch64_almalinux8_gcc14_dotnet:20251017.1 FROM $BASEIMAGE ENV LANG=en_US.UTF-8 diff --git a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/Dockerfile index 79d99d08dcc4e..d39e39a5a429d 100644 --- a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/Dockerfile @@ -1,4 +1,4 @@ -ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_aarch64_almalinux8_gcc14:20251008.2 +ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_aarch64_almalinux8_gcc14:20251017.1 FROM $BASEIMAGE ADD scripts /tmp/scripts diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile index 72d98206f9205..d2d5ae684e10e 100644 --- a/tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile @@ -2,7 +2,7 @@ # Licensed under the MIT License. # This file is used by Zip-Nuget Packaging NoContribOps Pipeline,Zip-Nuget-Java Packaging Pipeline -ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14_dotnet:20251008.2 +ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14_dotnet:20251017.1 FROM $BASEIMAGE ENV LANG=en_US.UTF-8 diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda12/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda12/Dockerfile index 85f4a074e30bf..344b60af68cd4 100644 --- a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda12/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda12/Dockerfile @@ -2,7 +2,7 @@ # Licensed under the MIT License. # This file is used by Zip-Nuget Packaging NoContribOps Pipeline,Zip-Nuget-Java Packaging Pipeline -ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc12_dotnet:20251008.2 +ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14_dotnet:20251017.1 FROM $BASEIMAGE ARG TRT_VERSION diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/python/cpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/python/cpu/Dockerfile index 81ba47f397f91..8288a98ed2adc 100644 --- a/tools/ci_build/github/linux/docker/inference/x86_64/python/cpu/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/x86_64/python/cpu/Dockerfile @@ -1,4 +1,4 @@ -ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14:20251008.2 +ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14:20251017.1 FROM $BASEIMAGE ADD scripts /tmp/scripts diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile index d87870db0bca8..c65febda1b33a 100644 --- a/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/x86_64/python/cuda/Dockerfile @@ -1,8 +1,8 @@ -# The default ARGs are for cuda 11.8 with cudnn8, TensorRT is optional +# The default ARGs are for cuda 12.8 with cudnn9, TensorRT is optional # Please overwrite BASEIMAGE, TRT_VERSION and other arguments with # --docker-build-args ' --build-arg BASEIMAGE=other_base_image --build-arg TRT_VERSION=other_trt_version etc...' # for other cuda version and TRT version -ARG BASEIMAGE=nvidia/cuda:12.5.1-cudnn-devel-ubi8 +ARG BASEIMAGE=nvidia/cuda:12.8.1-cudnn-devel-ubi8 FROM $BASEIMAGE ARG TRT_VERSION=10.9.0.34-1.cuda12.8 diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/python/openvino/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/python/openvino/Dockerfile index 5ad1023bfb5b2..44b14d31919b2 100644 --- a/tools/ci_build/github/linux/docker/inference/x86_64/python/openvino/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/x86_64/python/openvino/Dockerfile @@ -1,5 +1,5 @@ # Use the specified UBI8 base image with GCC 14 -ARG BASEIMAGE="onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14:20251008.2" +ARG BASEIMAGE="onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_almalinux8_gcc14:20251017.1" FROM ${BASEIMAGE} ARG BUILD_UID=1000 diff --git a/tools/ci_build/github/linux/docker/scripts/install_python_deps.sh b/tools/ci_build/github/linux/docker/scripts/install_python_deps.sh index b3acc4da57a4c..b0a7f4baaaff0 100755 --- a/tools/ci_build/github/linux/docker/scripts/install_python_deps.sh +++ b/tools/ci_build/github/linux/docker/scripts/install_python_deps.sh @@ -3,21 +3,16 @@ set -e -x INSTALL_DEPS_TRAINING=false INSTALL_DEPS_DISTRIBUTED_SETUP=false -TARGET_ROCM=false -CU_VER="11.8" -TORCH_VERSION='2.0.0' USE_CONDA=false -while getopts p:h:d:v:tmurc parameter_Option +while getopts p:d:v:tmuc parameter_Option do case "${parameter_Option}" in p) PYTHON_VER=${OPTARG};; -h) TORCH_VERSION=${OPTARG};; d) DEVICE_TYPE=${OPTARG};; v) CU_VER=${OPTARG};; t) INSTALL_DEPS_TRAINING=true;; m) INSTALL_DEPS_DISTRIBUTED_SETUP=true;; -r) TARGET_ROCM=true;; c) USE_CONDA=true;; esac done diff --git a/tools/ci_build/github/windows/setup_env_cuda.bat b/tools/ci_build/github/windows/setup_env_cuda.bat index f095f58f9920e..00ded251b3ed9 100644 --- a/tools/ci_build/github/windows/setup_env_cuda.bat +++ b/tools/ci_build/github/windows/setup_env_cuda.bat @@ -1,17 +1,40 @@ REM Copyright (c) Microsoft Corporation. All rights reserved. REM Licensed under the MIT License. -if exist PATH=%AGENT_TEMPDIRECTORY%\v12.8\ ( - set PATH=%AGENT_TEMPDIRECTORY%\v12.8\bin;%AGENT_TEMPDIRECTORY%\v12.8\extras\CUPTI\lib64;%PATH% +REM --- Setup CUDA 12.8 --- +REM Check if a local/agent-specific version exists +if exist "%AGENT_TEMPDIRECTORY%\v12.8\" ( + echo Using CUDA 12.8 from AGENT_TEMPDIRECTORY. + set "PATH=%AGENT_TEMPDIRECTORY%\v12.8\bin;%AGENT_TEMPDIRECTORY%\v12.8\extras\CUPTI\lib64;%PATH%" ) else ( - set PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\extras\CUPTI\lib64;%PATH% + echo Using system default CUDA 12.8. + set "PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\extras\CUPTI\lib64;%PATH%" ) -@REM The default version is still cuda v12.8, because set cuda v11.8 after it -if exist PATH=%AGENT_TEMPDIRECTORY%\v11.8\ ( - set PATH=%PATH%;%AGENT_TEMPDIRECTORY%\v11.8\bin;%AGENT_TEMPDIRECTORY%\v11.8\extras\CUPTI\lib64 +REM --- Setup TensorRT for CUDA 12.8 --- +set "TRT_12_8_PATH=%AGENT_TEMPDIRECTORY%\TensorRT-10.9.0.34.Windows10.x86_64.cuda-12.8\lib" +if exist "%TRT_12_8_PATH%\" ( + echo Adding TensorRT 10.9.0 (for CUDA 12.8) to PATH. + set "PATH=%TRT_12_8_PATH%;%PATH%" ) else ( - set PATH=%PATH%;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\extras\CUPTI\lib64 + echo Warning: TensorRT 10.9.0 directory not found at %TRT_12_8_PATH% ) + +REM --- [DISABLED] Setup for CUDA 13.0 --- +@REM If uncommented, CUDA 13.0 would become the default, overriding 12.8. +@REM set "TRT_13_0_PATH=%AGENT_TEMPDIRECTORY%\TensorRT-10.13.3.9.Windows.win10.cuda-13.0\lib" +@REM if exist "%TRT_13_0_PATH%\" ( +@REM set "PATH=%TRT_13_0_PATH%;%PATH%" +@REM ) +@REM +@REM if exist "%AGENT_TEMPDIRECTORY%\v13.0\" ( +@REM set "PATH=%AGENT_TEMPDIRECTORY%\v13.0\bin;%AGENT_TEMPDIRECTORY%\v13.0\extras\CUPTI\lib64;%PATH%" +@REM ) else ( +@REM set "PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\extras\CUPTI\lib64;%PATH%" +@REM ) +@REM --- [END DISABLED] --- + + set GRADLE_OPTS=-Dorg.gradle.daemon=false +set CUDA_MODULE_LOADING=LAZY diff --git a/tools/ci_build/github/windows/setup_env_gpu.bat b/tools/ci_build/github/windows/setup_env_gpu.bat deleted file mode 100644 index 115a19b6f3a01..0000000000000 --- a/tools/ci_build/github/windows/setup_env_gpu.bat +++ /dev/null @@ -1,21 +0,0 @@ -REM Copyright (c) Microsoft Corporation. All rights reserved. -REM Licensed under the MIT License. - -if exist PATH=%AGENT_TEMPDIRECTORY%\v12.8\ ( - set PATH=%AGENT_TEMPDIRECTORY%\v12.8\bin;%AGENT_TEMPDIRECTORY%\v12.8\extras\CUPTI\lib64;%PATH% -) else ( - set PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\extras\CUPTI\lib64;%PATH% -) -set PATH=%AGENT_TEMPDIRECTORY%\TensorRT-10.9.0.34.Windows10.x86_64.cuda-12.8\lib;%PATH% - -@REM The default version is still cuda v12.8, because set cuda v11.8 after it -set PATH=%PATH%;%AGENT_TEMPDIRECTORY%\TensorRT-10.9.0.34.Windows10.x86_64.cuda-11.8\lib -if exist PATH=%AGENT_TEMPDIRECTORY%\v11.8\ ( - set PATH=%PATH%;%AGENT_TEMPDIRECTORY%\v11.8\bin;%AGENT_TEMPDIRECTORY%\v11.8\extras\CUPTI\lib64 -) else ( - set PATH=%PATH%;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\\extras\CUPTI\lib64 -) - - -set GRADLE_OPTS=-Dorg.gradle.daemon=false -set CUDA_MODULE_LOADING=LAZY diff --git a/tools/ci_build/github/windows/setup_env_trt.bat b/tools/ci_build/github/windows/setup_env_trt.bat deleted file mode 100644 index 6110249a9cde6..0000000000000 --- a/tools/ci_build/github/windows/setup_env_trt.bat +++ /dev/null @@ -1,11 +0,0 @@ -REM Copyright (c) Microsoft Corporation. All rights reserved. -REM Licensed under the MIT License. - -if exist PATH=%AGENT_TEMPDIRECTORY%\v12.8\ ( - set PATH=%PATH%;%AGENT_TEMPDIRECTORY%\v12.8\bin;%AGENT_TEMPDIRECTORY%\v12.8\extras\CUPTI\lib64 -) else ( - set PATH=%PATH%;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\extras\CUPTI\lib64 -) -set PATH=%AGENT_TEMPDIRECTORY%\TensorRT-10.9.0.34.Windows10.x86_64.cuda-12.8\lib;%PATH% -set GRADLE_OPTS=-Dorg.gradle.daemon=false -set CUDA_MODULE_LOADING=LAZY From ae5d920f397337f4a5d246b18d2e4b7bf3a311ec Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Fri, 17 Oct 2025 21:22:13 -0700 Subject: [PATCH 2/6] fix setup_env_cuda.bat --- .../github/windows/setup_env_cuda.bat | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/ci_build/github/windows/setup_env_cuda.bat b/tools/ci_build/github/windows/setup_env_cuda.bat index 00ded251b3ed9..c63360e86b8d5 100644 --- a/tools/ci_build/github/windows/setup_env_cuda.bat +++ b/tools/ci_build/github/windows/setup_env_cuda.bat @@ -1,27 +1,27 @@ -REM Copyright (c) Microsoft Corporation. All rights reserved. -REM Licensed under the MIT License. +@REM Copyright (c) Microsoft Corporation. All rights reserved. +@REM Licensed under the MIT License. -REM --- Setup CUDA 12.8 --- -REM Check if a local/agent-specific version exists +@REM --- Setup CUDA 12.8 --- +@REM Check if a local/agent-specific version exists if exist "%AGENT_TEMPDIRECTORY%\v12.8\" ( - echo Using CUDA 12.8 from AGENT_TEMPDIRECTORY. + echo "Using CUDA 12.8 from AGENT_TEMPDIRECTORY." set "PATH=%AGENT_TEMPDIRECTORY%\v12.8\bin;%AGENT_TEMPDIRECTORY%\v12.8\extras\CUPTI\lib64;%PATH%" ) else ( - echo Using system default CUDA 12.8. + echo "Using system default CUDA 12.8." set "PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\extras\CUPTI\lib64;%PATH%" ) -REM --- Setup TensorRT for CUDA 12.8 --- +@REM --- Setup TensorRT for CUDA 12.8 --- set "TRT_12_8_PATH=%AGENT_TEMPDIRECTORY%\TensorRT-10.9.0.34.Windows10.x86_64.cuda-12.8\lib" if exist "%TRT_12_8_PATH%\" ( - echo Adding TensorRT 10.9.0 (for CUDA 12.8) to PATH. + echo "Adding TensorRT 10.9.0 for CUDA 12.8 to PATH." set "PATH=%TRT_12_8_PATH%;%PATH%" ) else ( - echo Warning: TensorRT 10.9.0 directory not found at %TRT_12_8_PATH% + echo "Warning: TensorRT 10.9.0 directory not found at %TRT_12_8_PATH%" ) -REM --- [DISABLED] Setup for CUDA 13.0 --- +@REM --- [DISABLED] Setup for CUDA 13.0 --- @REM If uncommented, CUDA 13.0 would become the default, overriding 12.8. @REM set "TRT_13_0_PATH=%AGENT_TEMPDIRECTORY%\TensorRT-10.13.3.9.Windows.win10.cuda-13.0\lib" @REM if exist "%TRT_13_0_PATH%\" ( From 3d69e11e3764bbbcc4960e5303c9e942b4d2bb6d Mon Sep 17 00:00:00 2001 From: Tianlei WU Date: Mon, 20 Oct 2025 14:10:52 -0700 Subject: [PATCH 3/6] cuda version choices in windows CI --- .github/workflows/linux_cuda_ci.yml | 4 ++-- .github/workflows/linux_tensorrt_ci.yml | 4 ++-- .../c-api-noopenmp-packaging-pipelines.yml | 14 ++++++----- ...-gpu-tensorrt-cuda-minimal-ci-pipeline.yml | 2 ++ .../azure-pipelines/post-merge-jobs.yml | 12 ++++++++-- .../templates/jobs/set-winenv.yml | 10 ++++---- .../templates/jobs/win-ci-prebuild-steps.yml | 9 +++++++- .../templates/jobs/win-ci-vs-2022-job.yml | 12 ++++++++-- .../win-gpu-doc-gen-ci-pipeline.yml | 10 +++++++- ...-gpu-tensorrt-cuda-minimal-ci-pipeline.yml | 10 ++++++-- ...etup_env_cuda.bat => setup_env_cuda12.bat} | 19 ++------------- .../github/windows/setup_env_cuda13.bat | 23 +++++++++++++++++++ 12 files changed, 89 insertions(+), 40 deletions(-) rename tools/ci_build/github/windows/{setup_env_cuda.bat => setup_env_cuda12.bat} (54%) create mode 100644 tools/ci_build/github/windows/setup_env_cuda13.bat diff --git a/.github/workflows/linux_cuda_ci.yml b/.github/workflows/linux_cuda_ci.yml index ae15d03b81853..f3376e3fde9f5 100644 --- a/.github/workflows/linux_cuda_ci.yml +++ b/.github/workflows/linux_cuda_ci.yml @@ -27,7 +27,7 @@ jobs: build_config: Release architecture: x64 dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda - docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc14:20251017.1' + docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1' docker_image_repo: onnxruntimecuda12manylinuxbuild extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cuda_version=12.8 --cuda_home=/usr/local/cuda-12.8 --cudnn_home=/usr/local/cuda-12.8 --enable_cuda_profiling --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON' python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH' @@ -55,7 +55,7 @@ jobs: with: dockerfile: ${{ github.workspace }}/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda image-name: ghcr.io/microsoft/onnxruntime/onnxruntimecuda12manylinuxbuild - build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc14:20251017.1' + build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1' push: true azure-container-registry-name: onnxruntimebuildcache env: diff --git a/.github/workflows/linux_tensorrt_ci.yml b/.github/workflows/linux_tensorrt_ci.yml index f30964320e653..0809eb783c2e9 100644 --- a/.github/workflows/linux_tensorrt_ci.yml +++ b/.github/workflows/linux_tensorrt_ci.yml @@ -27,7 +27,7 @@ jobs: build_config: Release architecture: x64 dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda - docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc14:20251017.1 --build-arg TRT_VERSION=10.9.0.34-1.cuda12.8 --network=host' + docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1 --build-arg TRT_VERSION=10.9.0.34-1.cuda12.8 --network=host' docker_image_repo: onnxruntimetensorrt86gpubuild extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cuda_version=12.8 --cuda_home=/usr/local/cuda-12.8 --cudnn_home=/usr/local/cuda-12.8 --use_tensorrt --tensorrt_home /usr --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON' python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH' @@ -57,7 +57,7 @@ jobs: with: dockerfile: ${{ github.workspace }}/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda image-name: ghcr.io/microsoft/onnxruntime/onnxruntimetensorrt86gpubuild - build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc14:20251017.1 --build-arg TRT_VERSION=10.9.0.34-1.cuda12.8 --network=host' + build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1 --build-arg TRT_VERSION=10.9.0.34-1.cuda12.8 --network=host' push: true azure-container-registry-name: onnxruntimebuildcache env: diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index 399b44a7f3cb2..b4e6040731d5f 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -57,6 +57,13 @@ parameters: type: string default: 2.39.0.250926 +- name: CudaVersion + displayName: CUDA version + type: string + default: '12.8' + values: + - 12.8 + resources: repositories: - repository: onnxruntime-inference-examples # The name used to reference this repository in the checkout step @@ -70,11 +77,6 @@ resources: variables: - template: templates/common-variables.yml -- name: ReleaseVersionSuffix - value: '' -- name: win_trt_version - value: 12.8 - - name: win_trt_home value: $(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda12 }} - name: win_cuda_home @@ -142,7 +144,7 @@ extends: - template: stages/nuget-combine-cuda-stage.yml parameters: - CudaVersion: 12.8 + CudaVersion: ${{ parameters.CudaVersion }} RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} UseIncreasedTimeoutForTests: ${{ parameters.UseIncreasedTimeoutForTests }} win_trt_home: ${{ variables.win_trt_home }} diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml index 829b44af32411..b36df5748f3b3 100644 --- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-cuda-minimal-ci-pipeline.yml @@ -34,6 +34,7 @@ parameters: default: '12.8' values: - 12.8 + - 13.0 variables: - template: templates/common-variables.yml @@ -48,6 +49,7 @@ variables: ${{ if eq(parameters.CudaVersion, '12.8') }}: value: ${{ variables.linux_trt_version_cuda12 }} + jobs: - job: Linux_Build timeoutInMinutes: 180 diff --git a/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml b/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml index 81a7aad8d0088..71f8dd567793c 100644 --- a/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml +++ b/tools/ci_build/github/azure-pipelines/post-merge-jobs.yml @@ -5,6 +5,7 @@ parameters: default: '12.8' values: - 12.8 + - 13.0 variables: - template: templates/common-variables.yml @@ -13,6 +14,11 @@ variables: value: ${{ variables.win_trt_folder_cuda13 }} ${{ if eq(parameters.CudaVersion, '12.8') }}: value: ${{ variables.win_trt_folder_cuda12 }} + - name: setup_env_script + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: 'setup_env_cuda13.bat' + ${{ if eq(parameters.CudaVersion, '12.8') }}: + value: 'setup_env_cuda12.bat' stages: - template: templates/web-ci.yml @@ -219,8 +225,9 @@ stages: - template: templates/jobs/win-ci-vs-2022-job.yml parameters: BuildConfig: 'RelWithDebInfo' - EnvSetupScript: setup_env_cuda.bat + EnvSetupScript: '${{ variables.setup_env_script }}' buildArch: x64 + CudaVersion: ${{ parameters.CudaVersion }} additionalBuildFlags: --build_wheel --build_java --build_nodejs --use_cuda --cuda_home="$(Agent.TempDirectory)\v${{ parameters.CudaVersion }}" --enable_cuda_profiling --enable_transformers_tool_test --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 @@ -242,8 +249,9 @@ stages: - template: templates/jobs/win-ci-vs-2022-job.yml parameters: BuildConfig: 'RelWithDebInfo' - EnvSetupScript: setup_env_cuda.bat + EnvSetupScript: '${{ variables.setup_env_script }}' buildArch: x64 + CudaVersion: ${{ parameters.CudaVersion }} additionalBuildFlags: --config RelWithDebInfo --parallel --use_binskim_compliant_compile_flags --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\${{ variables.win_trt_folder }}" --cuda_home="$(Agent.TempDirectory)\v${{ parameters.CudaVersion }}" --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 msbuildPlatform: x64 isX86: false diff --git a/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml b/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml index 307cec714a48b..00370eedb8d6e 100644 --- a/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml +++ b/tools/ci_build/github/azure-pipelines/templates/jobs/set-winenv.yml @@ -7,7 +7,7 @@ parameters: - name: DownloadTRT type: boolean default: false - - name: CUDAVersion + - name: CudaVersion type: string default: '12.8' values: @@ -23,15 +23,15 @@ parameters: steps: - ${{ if eq(parameters.DownloadCUDA, 'true') }}: - powershell: | - azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ parameters.CUDAVersion }}" $(Agent.TempDirectory) - displayName: 'Download Primary CUDA SDK v${{ parameters.CUDAVersion }}' + azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ parameters.CudaVersion }}" $(Agent.TempDirectory) + displayName: 'Download Primary CUDA SDK v${{ parameters.CudaVersion }}' - ${{ if eq(parameters.DownloadTRT, 'true') }}: - - ${{ if eq(parameters.CUDAVersion, '12.8') }}: + - ${{ if eq(parameters.CudaVersion, '12.8') }}: - powershell: | azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/${{ parameters.win_trt_folder_cuda12 }}" $(Agent.TempDirectory) displayName: 'Download ${{ parameters.win_trt_folder_cuda12 }}' - - ${{ if eq(parameters.CUDAVersion, '13.0') }}: + - ${{ if eq(parameters.CudaVersion, '13.0') }}: - powershell: | azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/local/${{ parameters.win_trt_folder_cuda13 }}" $(Agent.TempDirectory) displayName: 'Download ${{ parameters.win_trt_folder_cuda13 }}' diff --git a/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-prebuild-steps.yml b/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-prebuild-steps.yml index 39da103a2285b..ecafd578e1b6d 100644 --- a/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-prebuild-steps.yml +++ b/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-prebuild-steps.yml @@ -27,6 +27,13 @@ parameters: - name: Today type: string +- name: CudaVersion + type: string + default: '12.8' + values: + - 13.0 + - 12.8 + steps: - task: UsePythonVersion@0 inputs: @@ -69,6 +76,7 @@ steps: - template: set-winenv.yml parameters: + CudaVersion: ${{ parameters.CudaVersion }} EnvSetupScript: ${{parameters.EnvSetupScript}} DownloadCUDA: ${{parameters.DownloadCUDA}} DownloadTRT: ${{parameters.DownloadTRT}} @@ -94,4 +102,3 @@ steps: (C:\ProgramData\chocolatey\bin\cl.exe -?) -match 'Compiler Version' displayName: Install ccache and update PATH to use linked versions of gcc, cc, etc - diff --git a/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-vs-2022-job.yml b/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-vs-2022-job.yml index 083381817818b..eb6492f779b94 100644 --- a/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-vs-2022-job.yml +++ b/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-vs-2022-job.yml @@ -55,6 +55,13 @@ parameters: type: number default: 0 +- name: CudaVersion + displayName: CUDA version + type: string + default: '12.8' + values: + - 12.8 + - 13.0 jobs: - job: build_${{ parameters.job_name_suffix }} @@ -85,6 +92,7 @@ jobs: - template: win-ci-prebuild-steps.yml parameters: + CudaVersion: ${{ parameters.CudaVersion }} EnvSetupScript: ${{parameters.EnvSetupScript}} ${{ if contains(parameters.additionalBuildFlags, 'use_cuda') }}: DownloadCUDA: true @@ -142,7 +150,7 @@ jobs: restoreSolution: '$(Build.SourcesDirectory)\packages.config' nugetConfigPath: '$(Build.SourcesDirectory)\tools\ci_build\github\azure-pipelines\nuget\nuget_config\nuget.config' restoreDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}' - + - ${{ if eq(parameters.RunOnnxRuntimeTests, true) }}: - powershell: | python.exe $(Build.SourcesDirectory)\tools\ci_build\build.py --config ${{ parameters.BuildConfig }} --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_csharp --parallel --use_binskim_compliant_compile_flags --cmake_generator "Visual Studio 17 2022" --build_shared_lib --enable_onnx_tests ${{ parameters.additionalBuildFlags }} @@ -160,7 +168,7 @@ jobs: Remove-Item "$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}" -Include "*.obj" -Recurse displayName: 'Build' - - script: + - script: python tools\ValidateNativeDelegateAttributes.py displayName: 'Validate C# native delegates' workingDirectory: '$(Build.SourcesDirectory)\csharp' diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-doc-gen-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-doc-gen-ci-pipeline.yml index 3724e2f6a2493..b75611e023c25 100644 --- a/tools/ci_build/github/azure-pipelines/win-gpu-doc-gen-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-gpu-doc-gen-ci-pipeline.yml @@ -37,6 +37,13 @@ parameters: - 13.0 - 12.8 +variables: + - name: setup_env_script + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: 'setup_env_cuda13.bat' + ${{ if eq(parameters.CudaVersion, '12.8') }}: + value: 'setup_env_cuda12.bat' + stages: - stage: kernelDocumentation dependsOn: [] @@ -44,8 +51,9 @@ stages: - template: templates/jobs/win-ci-vs-2022-job.yml parameters: BuildConfig: 'RelWithDebInfo' - EnvSetupScript: setup_env_cuda.bat + EnvSetupScript: '${{ variables.setup_env_script }}' buildArch: x64 + CudaVersion: ${{ parameters.CudaVersion }} # note: need to specify `--gen_doc` when creating the build config so it has to be in additionalBuildFlags additionalBuildFlags: >- --gen_doc validate --skip_tests --build_wheel --use_dml --use_cuda diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml index 893de1598695c..459951893433e 100644 --- a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-cuda-minimal-ci-pipeline.yml @@ -34,6 +34,7 @@ parameters: default: '12.8' values: - 12.8 + - 13.0 variables: - template: templates/common-variables.yml @@ -42,13 +43,17 @@ variables: value: ${{ variables.win_trt_folder_cuda13 }} ${{ if eq(parameters.CudaVersion, '12.8') }}: value: ${{ variables.win_trt_folder_cuda12 }} + - name: setup_env_script + ${{ if eq(parameters.CudaVersion, '13.0') }}: + value: 'setup_env_cuda13.bat' + ${{ if eq(parameters.CudaVersion, '12.8') }}: + value: 'setup_env_cuda12.bat' jobs: - job: 'build' pool: 'onnxruntime-Win2022-GPU-A10' variables: MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary' - EnvSetupScript: setup_env_cuda.bat skipComponentGovernanceDetection: true TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)] timeoutInMinutes: 150 @@ -57,7 +62,8 @@ jobs: steps: - template: templates/jobs/win-ci-prebuild-steps.yml parameters: - EnvSetupScript: $(EnvSetupScript) + CudaVersion: ${{ parameters.CudaVersion }} + EnvSetupScript: '${{ variables.setup_env_script }}' DownloadCUDA: true DownloadTRT: true BuildArch: 'x64' diff --git a/tools/ci_build/github/windows/setup_env_cuda.bat b/tools/ci_build/github/windows/setup_env_cuda12.bat similarity index 54% rename from tools/ci_build/github/windows/setup_env_cuda.bat rename to tools/ci_build/github/windows/setup_env_cuda12.bat index c63360e86b8d5..7a9f3181fb36f 100644 --- a/tools/ci_build/github/windows/setup_env_cuda.bat +++ b/tools/ci_build/github/windows/setup_env_cuda12.bat @@ -1,5 +1,5 @@ -@REM Copyright (c) Microsoft Corporation. All rights reserved. -@REM Licensed under the MIT License. +REM Copyright (c) Microsoft Corporation. All rights reserved. +REM Licensed under the MIT License. @REM --- Setup CUDA 12.8 --- @REM Check if a local/agent-specific version exists @@ -21,20 +21,5 @@ if exist "%TRT_12_8_PATH%\" ( ) -@REM --- [DISABLED] Setup for CUDA 13.0 --- -@REM If uncommented, CUDA 13.0 would become the default, overriding 12.8. -@REM set "TRT_13_0_PATH=%AGENT_TEMPDIRECTORY%\TensorRT-10.13.3.9.Windows.win10.cuda-13.0\lib" -@REM if exist "%TRT_13_0_PATH%\" ( -@REM set "PATH=%TRT_13_0_PATH%;%PATH%" -@REM ) -@REM -@REM if exist "%AGENT_TEMPDIRECTORY%\v13.0\" ( -@REM set "PATH=%AGENT_TEMPDIRECTORY%\v13.0\bin;%AGENT_TEMPDIRECTORY%\v13.0\extras\CUPTI\lib64;%PATH%" -@REM ) else ( -@REM set "PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\extras\CUPTI\lib64;%PATH%" -@REM ) -@REM --- [END DISABLED] --- - - set GRADLE_OPTS=-Dorg.gradle.daemon=false set CUDA_MODULE_LOADING=LAZY diff --git a/tools/ci_build/github/windows/setup_env_cuda13.bat b/tools/ci_build/github/windows/setup_env_cuda13.bat new file mode 100644 index 0000000000000..63c33cc233d60 --- /dev/null +++ b/tools/ci_build/github/windows/setup_env_cuda13.bat @@ -0,0 +1,23 @@ +REM Copyright (c) Microsoft Corporation. All rights reserved. +REM Licensed under the MIT License. + +@REM --- Setup for CUDA 13.0 --- +if exist "%AGENT_TEMPDIRECTORY%\v13.0\" ( + echo "Using CUDA 13.0 from AGENT_TEMPDIRECTORY." + set "PATH=%AGENT_TEMPDIRECTORY%\v13.0\bin;%AGENT_TEMPDIRECTORY%\v13.0\extras\CUPTI\lib64;%PATH%" +) else ( + echo "Using system default CUDA 13.0." + set "PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\extras\CUPTI\lib64;%PATH%" +) + +@REM --- Setup TensorRT for CUDA 13.0 --- +set "TRT_13_0_PATH=%AGENT_TEMPDIRECTORY%\TensorRT-10.13.3.9.Windows.win10.cuda-13.0\lib" +if exist "%TRT_13_0_PATH%\" ( + echo "Adding TensorRT 10.13.3.9 for CUDA 13.0 to PATH." + set "PATH=%TRT_13_0_PATH%;%PATH%" +) else ( + echo "Warning: TensorRT 10.13.3.9 directory not found at %TRT_13_0_PATH%" +) + +set GRADLE_OPTS=-Dorg.gradle.daemon=false +set CUDA_MODULE_LOADING=LAZY From 63038839559812817b53d4d9fcf04f1b1ca8f7f9 Mon Sep 17 00:00:00 2001 From: Tianlei WU Date: Mon, 20 Oct 2025 15:21:35 -0700 Subject: [PATCH 4/6] patch cutlass --- cmake/external/cutlass.cmake | 2 +- cmake/patches/cutlass/cutlass_4.2.1.patch | 39 +++++++++++++++++++ .../cutlass/cutlass_4.2.1_maybe_unused.patch | 13 ------- 3 files changed, 40 insertions(+), 14 deletions(-) create mode 100644 cmake/patches/cutlass/cutlass_4.2.1.patch delete mode 100644 cmake/patches/cutlass/cutlass_4.2.1_maybe_unused.patch diff --git a/cmake/external/cutlass.cmake b/cmake/external/cutlass.cmake index 44b794d9e2f78..df554269dfc7f 100644 --- a/cmake/external/cutlass.cmake +++ b/cmake/external/cutlass.cmake @@ -4,7 +4,7 @@ onnxruntime_fetchcontent_declare( URL ${DEP_URL_cutlass} URL_HASH SHA1=${DEP_SHA1_cutlass} EXCLUDE_FROM_ALL -PATCH_COMMAND ${Patch_EXECUTABLE} --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/cutlass/cutlass_4.2.1_maybe_unused.patch +PATCH_COMMAND ${Patch_EXECUTABLE} --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/cutlass/cutlass_4.2.1.patch ) FetchContent_GetProperties(cutlass) diff --git a/cmake/patches/cutlass/cutlass_4.2.1.patch b/cmake/patches/cutlass/cutlass_4.2.1.patch new file mode 100644 index 0000000000000..3a3ec5ba103ef --- /dev/null +++ b/cmake/patches/cutlass/cutlass_4.2.1.patch @@ -0,0 +1,39 @@ +diff --git a/include/cute/layout.hpp b/include/cute/layout.hpp +index cb161369..2fdff179 100644 +--- a/include/cute/layout.hpp ++++ b/include/cute/layout.hpp +@@ -1487,7 +1487,7 @@ nullspace(Layout const& layout) + [[maybe_unused]] auto flat_stride = flatten(layout.stride()); + + // Select all indices corresponding to stride-0s +- auto iseq = cute::fold(make_seq>{}, cute::tuple<>{}, ++ [[maybe_unused]] auto iseq = cute::fold(make_seq>{}, cute::tuple<>{}, + [&](auto init, auto i){ + if constexpr (is_constant_v<0, decltype(get(flat_stride))>) { return append(init, i); } + else { return init; } +diff --git a/include/cutlass/exmy_base.h b/include/cutlass/exmy_base.h +index be207a49..6028e01d 100644 +--- a/include/cutlass/exmy_base.h ++++ b/include/cutlass/exmy_base.h +@@ -1021,18 +1021,18 @@ struct float_exmy_base + + /// Floating point conversion + CUTLASS_HOST_DEVICE +- explicit float_exmy_base(float x) { ++ explicit float_exmy_base(float x) { + storage = static_cast(this)->convert_from_float(x).storage; + } + + // Integer conversion + CUTLASS_HOST_DEVICE +- explicit float_exmy_base(int x) { ++ explicit float_exmy_base(int x) { + storage = static_cast(this)->convert_from_float(float(x)).storage; + } + + CUTLASS_HOST_DEVICE +- explicit float_exmy_base(unsigned x) { ++ explicit float_exmy_base(unsigned x) { + storage = static_cast(this)->convert_from_float(float(x)).storage; + } + diff --git a/cmake/patches/cutlass/cutlass_4.2.1_maybe_unused.patch b/cmake/patches/cutlass/cutlass_4.2.1_maybe_unused.patch deleted file mode 100644 index 03d5972823839..0000000000000 --- a/cmake/patches/cutlass/cutlass_4.2.1_maybe_unused.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/include/cute/layout.hpp b/include/cute/layout.hpp -index cb161369..2fdff179 100644 ---- a/include/cute/layout.hpp -+++ b/include/cute/layout.hpp -@@ -1487,7 +1487,7 @@ nullspace(Layout const& layout) - [[maybe_unused]] auto flat_stride = flatten(layout.stride()); - - // Select all indices corresponding to stride-0s -- auto iseq = cute::fold(make_seq>{}, cute::tuple<>{}, -+ [[maybe_unused]] auto iseq = cute::fold(make_seq>{}, cute::tuple<>{}, - [&](auto init, auto i){ - if constexpr (is_constant_v<0, decltype(get(flat_stride))>) { return append(init, i); } - else { return init; } From 3c5bc85211e4859026122f27079a0894ee3bc264 Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Mon, 20 Oct 2025 20:15:13 -0700 Subject: [PATCH 5/6] update --- .github/workflows/linux_cuda_ci.yml | 2 +- .github/workflows/linux_tensorrt_ci.yml | 2 +- .../linux/docker/inference/aarch64/default/cpu/Dockerfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/linux_cuda_ci.yml b/.github/workflows/linux_cuda_ci.yml index f3376e3fde9f5..9ec5ea47fbf0c 100644 --- a/.github/workflows/linux_cuda_ci.yml +++ b/.github/workflows/linux_cuda_ci.yml @@ -99,5 +99,5 @@ jobs: build_config: Release mode: 'test' # Set mode to test execution_providers: 'cuda' - extra_build_flags: '--use_binskim_compliant_compile_flags --cuda_version=12.2 --cuda_home=/usr/local/cuda-12.2 --cudnn_home=/usr/local/cuda-12.2 --enable_cuda_profiling --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON' + extra_build_flags: '--use_binskim_compliant_compile_flags --cuda_version=12.8 --cuda_home=/usr/local/cuda-12.8 --cudnn_home=/usr/local/cuda-12.8 --enable_cuda_profiling --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON' python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH' diff --git a/.github/workflows/linux_tensorrt_ci.yml b/.github/workflows/linux_tensorrt_ci.yml index 0809eb783c2e9..064ad87794cdd 100644 --- a/.github/workflows/linux_tensorrt_ci.yml +++ b/.github/workflows/linux_tensorrt_ci.yml @@ -101,5 +101,5 @@ jobs: build_config: Release mode: 'test' # Set mode to test execution_providers: 'cuda tensorrt' - extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cuda_version=12.2 --cuda_home=/usr/local/cuda-12.2 --cudnn_home=/usr/local/cuda-12.2 --use_tensorrt --tensorrt_home /usr --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON' + extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cuda_version=12.8 --cuda_home=/usr/local/cuda-12.8 --cudnn_home=/usr/local/cuda-12.8 --use_tensorrt --tensorrt_home /usr --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON' python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH' diff --git a/tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile b/tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile index e2ddd04ee4642..cef2d11780969 100644 --- a/tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/aarch64/default/cpu/Dockerfile @@ -2,7 +2,7 @@ # Licensed under the MIT License. # This file is used by Zip-Nuget Packaging NoContribOps Pipeline,Zip-Nuget-Java Packaging Pipeline -ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_aarch64_almalinux8_gcc14_dotnet:20251017.1 +ARG BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_aarch64_almalinux8_gcc14_dotnet:20251008.2 FROM $BASEIMAGE ENV LANG=en_US.UTF-8 From 4f112ff6a68708ab305f089cf9c16e05837339ad Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Mon, 20 Oct 2025 21:00:49 -0700 Subject: [PATCH 6/6] gcc 14 --- .../linux/docker/inference/x86_64/default/cuda12/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda12/Dockerfile b/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda12/Dockerfile index 344b60af68cd4..44fcdb33c2d51 100644 --- a/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda12/Dockerfile +++ b/tools/ci_build/github/linux/docker/inference/x86_64/default/cuda12/Dockerfile @@ -36,7 +36,7 @@ fi ENV LANG=en_US.UTF-8 ENV LC_ALL=en_US.UTF-8 -ENV CUDAHOSTCXX=/opt/rh/gcc-toolset-12/root/usr/bin/g++ +ENV CUDAHOSTCXX=/opt/rh/gcc-toolset-14/root/usr/bin/g++ ADD scripts /tmp/scripts RUN sed -i 's/enabled\s*=\s*1/enabled = 1\nexclude=dotnet* aspnet* netstandard*/g' /etc/yum.repos.d/almalinux.repo ENV PATH=/usr/lib/jvm/msopenjdk-17/bin:$PATH