Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/linux_cuda_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ jobs:
build_config: Release
architecture: x64
dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250124.1'
docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1'
docker_image_repo: onnxruntimecuda12manylinuxbuild
extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cuda_version=12.2 --cuda_home=/usr/local/cuda-12.2 --cudnn_home=/usr/local/cuda-12.2 --enable_cuda_profiling --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cuda_version=12.8 --cuda_home=/usr/local/cuda-12.8 --cudnn_home=/usr/local/cuda-12.8 --enable_cuda_profiling --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH'
run_tests: false # <<< Do not run tests in this job
upload_build_output: true # <<< Upload the build/Release directory
Expand All @@ -55,7 +55,7 @@ jobs:
with:
dockerfile: ${{ github.workspace }}/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
image-name: ghcr.io/microsoft/onnxruntime/onnxruntimecuda12manylinuxbuild
build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250124.1'
build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1'
push: true
azure-container-registry-name: onnxruntimebuildcache
env:
Expand Down Expand Up @@ -99,5 +99,5 @@ jobs:
build_config: Release
mode: 'test' # Set mode to test
execution_providers: 'cuda'
extra_build_flags: '--use_binskim_compliant_compile_flags --cuda_version=12.2 --cuda_home=/usr/local/cuda-12.2 --cudnn_home=/usr/local/cuda-12.2 --enable_cuda_profiling --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
extra_build_flags: '--use_binskim_compliant_compile_flags --cuda_version=12.8 --cuda_home=/usr/local/cuda-12.8 --cudnn_home=/usr/local/cuda-12.8 --enable_cuda_profiling --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH'
8 changes: 4 additions & 4 deletions .github/workflows/linux_tensorrt_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ jobs:
build_config: Release
architecture: x64
dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250124.1 --build-arg TRT_VERSION=10.9.0.34-1.cuda12.8 --network=host'
docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1 --build-arg TRT_VERSION=10.9.0.34-1.cuda12.8 --network=host'
docker_image_repo: onnxruntimetensorrt86gpubuild
extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cuda_version=12.2 --cuda_home=/usr/local/cuda-12.2 --cudnn_home=/usr/local/cuda-12.2 --use_tensorrt --tensorrt_home /usr --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cuda_version=12.8 --cuda_home=/usr/local/cuda-12.8 --cudnn_home=/usr/local/cuda-12.8 --use_tensorrt --tensorrt_home /usr --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH'
run_tests: false # <<< Do not run tests in this job
upload_build_output: true # <<< Upload the build/Release directory
Expand Down Expand Up @@ -57,7 +57,7 @@ jobs:
with:
dockerfile: ${{ github.workspace }}/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
image-name: ghcr.io/microsoft/onnxruntime/onnxruntimetensorrt86gpubuild
build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20250124.1 --build-arg TRT_VERSION=10.9.0.34-1.cuda12.8 --network=host'
build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1 --build-arg TRT_VERSION=10.9.0.34-1.cuda12.8 --network=host'
push: true
azure-container-registry-name: onnxruntimebuildcache
env:
Expand Down Expand Up @@ -101,5 +101,5 @@ jobs:
build_config: Release
mode: 'test' # Set mode to test
execution_providers: 'cuda tensorrt'
extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cuda_version=12.2 --cuda_home=/usr/local/cuda-12.2 --cudnn_home=/usr/local/cuda-12.2 --use_tensorrt --tensorrt_home /usr --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --cuda_version=12.8 --cuda_home=/usr/local/cuda-12.8 --cudnn_home=/usr/local/cuda-12.8 --use_tensorrt --tensorrt_home /usr --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=90 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH'
2 changes: 1 addition & 1 deletion cmake/external/cutlass.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ onnxruntime_fetchcontent_declare(
URL ${DEP_URL_cutlass}
URL_HASH SHA1=${DEP_SHA1_cutlass}
EXCLUDE_FROM_ALL
PATCH_COMMAND ${Patch_EXECUTABLE} --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/cutlass/cutlass_4.2.1_maybe_unused.patch
PATCH_COMMAND ${Patch_EXECUTABLE} --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/cutlass/cutlass_4.2.1.patch
)

FetchContent_GetProperties(cutlass)
Expand Down
39 changes: 39 additions & 0 deletions cmake/patches/cutlass/cutlass_4.2.1.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
diff --git a/include/cute/layout.hpp b/include/cute/layout.hpp
index cb161369..2fdff179 100644
--- a/include/cute/layout.hpp
+++ b/include/cute/layout.hpp
@@ -1487,7 +1487,7 @@ nullspace(Layout<Shape,Stride> const& layout)
[[maybe_unused]] auto flat_stride = flatten(layout.stride());

// Select all indices corresponding to stride-0s
- auto iseq = cute::fold(make_seq<rank_v<decltype(flat_stride)>>{}, cute::tuple<>{},
+ [[maybe_unused]] auto iseq = cute::fold(make_seq<rank_v<decltype(flat_stride)>>{}, cute::tuple<>{},
[&](auto init, auto i){
if constexpr (is_constant_v<0, decltype(get<i>(flat_stride))>) { return append(init, i); }
else { return init; }
diff --git a/include/cutlass/exmy_base.h b/include/cutlass/exmy_base.h
index be207a49..6028e01d 100644
--- a/include/cutlass/exmy_base.h
+++ b/include/cutlass/exmy_base.h
@@ -1021,18 +1021,18 @@ struct float_exmy_base

/// Floating point conversion
CUTLASS_HOST_DEVICE
- explicit float_exmy_base<T, Derived>(float x) {
+ explicit float_exmy_base(float x) {
storage = static_cast<Derived*>(this)->convert_from_float(x).storage;
}

// Integer conversion
CUTLASS_HOST_DEVICE
- explicit float_exmy_base<T, Derived>(int x) {
+ explicit float_exmy_base(int x) {
storage = static_cast<Derived*>(this)->convert_from_float(float(x)).storage;
}

CUTLASS_HOST_DEVICE
- explicit float_exmy_base<T, Derived>(unsigned x) {
+ explicit float_exmy_base(unsigned x) {
storage = static_cast<Derived*>(this)->convert_from_float(float(x)).storage;
}

13 changes: 0 additions & 13 deletions cmake/patches/cutlass/cutlass_4.2.1_maybe_unused.patch

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ stages:
extra_build_arg: ''
cmake_build_type: Release
cuda_version: 12.8
docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc12:20250714.2
docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ parameters:
type: string
default: 2.39.0.250926

- name: CudaVersion
displayName: CUDA version
type: string
default: '12.8'
values:
- 12.8

resources:
repositories:
- repository: onnxruntime-inference-examples # The name used to reference this repository in the checkout step
Expand All @@ -70,11 +77,6 @@ resources:

variables:
- template: templates/common-variables.yml
- name: ReleaseVersionSuffix
value: ''
- name: win_trt_version
value: 12.8

- name: win_trt_home
value: $(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda12 }}
- name: win_cuda_home
Expand Down Expand Up @@ -142,7 +144,7 @@ extends:

- template: stages/nuget-combine-cuda-stage.yml
parameters:
CudaVersion: 12.8
CudaVersion: ${{ parameters.CudaVersion }}
RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }}
UseIncreasedTimeoutForTests: ${{ parameters.UseIncreasedTimeoutForTests }}
win_trt_home: ${{ variables.win_trt_home }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,20 @@ parameters:
default: '12.8'
values:
- 12.8
- 13.0

variables:
- template: templates/common-variables.yml
- name: ReleaseVersionSuffix
value: ''
- name: win_trt_home
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: $(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda11 }}
${{ if eq(parameters.CudaVersion, '13.0') }}:
value: $(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda13 }}
${{ if eq(parameters.CudaVersion, '12.8') }}:
value: $(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda12 }}
- name: win_cuda_home
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: $(Agent.TempDirectory)\v11.8
${{ if eq(parameters.CudaVersion, '13.0') }}:
value: $(Agent.TempDirectory)\v13.0
${{ if eq(parameters.CudaVersion, '12.8') }}:
value: $(Agent.TempDirectory)\v12.8

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ variables:
- name: ReleaseVersionSuffix
value: ''
- name: win_cuda_home
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: $(Agent.TempDirectory)\v11.8
${{ if eq(parameters.CudaVersion, '13.0') }}:
value: $(Agent.TempDirectory)\v13.0
${{ if eq(parameters.CudaVersion, '12.8') }}:
value: $(Agent.TempDirectory)\v12.8

Expand Down
4 changes: 2 additions & 2 deletions tools/ci_build/github/azure-pipelines/jar_package_testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ stages:
DownloadTRT: true

- template: templates/setup-maven.yml

- task: Maven@4
displayName: 'Download Java Dependencies'
inputs:
Expand Down Expand Up @@ -105,7 +105,7 @@ stages:
- name: runCodesignValidationInjection
value: false
- name: docker_base_image
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc12:20251008.2
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1
timeoutInMinutes: 60
steps:
- checkout: self
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,22 @@ parameters:
default: '12.8'
values:
- 12.8
- 13.0

variables:
- template: templates/common-variables.yml
- name: docker_base_image
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20251008.2
${{ if eq(parameters.CudaVersion, '13.0') }}:
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda13_x64_almalinux8_gcc14:20251017.1
${{ if eq(parameters.CudaVersion, '12.8') }}:
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc12:20251008.2
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1
- name: linux_trt_version
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: ${{ variables.linux_trt_version_cuda11 }}
${{ if eq(parameters.CudaVersion, '13.0') }}:
value: ${{ variables.linux_trt_version_cuda13 }}
${{ if eq(parameters.CudaVersion, '12.8') }}:
value: ${{ variables.linux_trt_version_cuda12 }}


jobs:
- job: Linux_Build
timeoutInMinutes: 180
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ parameters:
SpecificArtifact: false
CustomOpArtifactName: 'onnxruntime-linux-x64'
BuildId: '0'
CudaVersion: '11.8'
CudaVersion: '12.8'
stages:
- stage: NuGet_Test_Linux_${{ parameters.StageSuffix }}${{ parameters.MoreSuffix }}
dependsOn:
Expand Down Expand Up @@ -41,7 +41,7 @@ stages:
- script: |
mv $(Pipeline.Workspace)/build/drop-signed-nuget-${{ parameters.ArtifactSuffix }} $(Build.BinariesDirectory)/nuget-artifact
mv $(Pipeline.Workspace)/build/${{ parameters.CustomOpArtifactName }} $(Build.BinariesDirectory)/testdata
- template: get-nuget-package-version-as-variable.yml
parameters:
Expand Down
16 changes: 12 additions & 4 deletions tools/ci_build/github/azure-pipelines/post-merge-jobs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,20 @@ parameters:
default: '12.8'
values:
- 12.8
- 13.0

variables:
- template: templates/common-variables.yml
- name: win_trt_folder
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: ${{ variables.win_trt_folder_cuda11 }}
${{ if eq(parameters.CudaVersion, '13.0') }}:
value: ${{ variables.win_trt_folder_cuda13 }}
${{ if eq(parameters.CudaVersion, '12.8') }}:
value: ${{ variables.win_trt_folder_cuda12 }}
- name: setup_env_script
${{ if eq(parameters.CudaVersion, '13.0') }}:
value: 'setup_env_cuda13.bat'
${{ if eq(parameters.CudaVersion, '12.8') }}:
value: 'setup_env_cuda12.bat'

stages:
- template: templates/web-ci.yml
Expand Down Expand Up @@ -219,8 +225,9 @@ stages:
- template: templates/jobs/win-ci-vs-2022-job.yml
parameters:
BuildConfig: 'RelWithDebInfo'
EnvSetupScript: setup_env_cuda.bat
EnvSetupScript: '${{ variables.setup_env_script }}'
buildArch: x64
CudaVersion: ${{ parameters.CudaVersion }}
additionalBuildFlags: --build_wheel --build_java --build_nodejs --use_cuda --cuda_home="$(Agent.TempDirectory)\v${{ parameters.CudaVersion }}"
--enable_cuda_profiling --enable_transformers_tool_test
--use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
Expand All @@ -242,8 +249,9 @@ stages:
- template: templates/jobs/win-ci-vs-2022-job.yml
parameters:
BuildConfig: 'RelWithDebInfo'
EnvSetupScript: setup_env_trt.bat
EnvSetupScript: '${{ variables.setup_env_script }}'
buildArch: x64
CudaVersion: ${{ parameters.CudaVersion }}
additionalBuildFlags: --config RelWithDebInfo --parallel --use_binskim_compliant_compile_flags --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\${{ variables.win_trt_folder }}" --cuda_home="$(Agent.TempDirectory)\v${{ parameters.CudaVersion }}" --use_vcpkg --use_vcpkg_ms_internal_asset_cache --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
msbuildPlatform: x64
isX86: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ stages:
machine_pool: 'Onnxruntime-Linux-GPU'
python_wheel_suffix: '_gpu'
timeout: 480
docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc12:20251008.2
docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1
cuda_version: '12.8'

- stage: Republish_Wheels
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ jobs:
variables:
- template: ../../templates/common-variables.yml
- name: docker_base_image
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20251008.2
${{ if eq(parameters.CudaVersion, '13.0') }}:
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda13_x64_almalinux8_gcc14:20251017.1
${{ if eq(parameters.CudaVersion, '12.8') }}:
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc12:20251008.2
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1
- name: linux_trt_version
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: ${{ variables.linux_trt_version_cuda11 }}
${{ if eq(parameters.CudaVersion, '13.0') }}:
value: ${{ variables.linux_trt_version_cuda13 }}
${{ if eq(parameters.CudaVersion, '12.8') }}:
value: ${{ variables.linux_trt_version_cuda12 }}
pool: ${{ parameters.machine_pool }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ stages:
variables:
- template: ../templates/common-variables.yml
- name: CUDA_VERSION_MAJOR
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: '11'
${{ if eq(parameters.CudaVersion, '13.0') }}:
value: '13'
${{ if eq(parameters.CudaVersion, '12.8') }}:
value: '12'
- name: CUDA_VERSION
value: ${{ parameters.CudaVersion }}
- name: linux_trt_version
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: ${{ variables.linux_trt_version_cuda11 }}
${{ if eq(parameters.CudaVersion, '13.0') }}:
value: ${{ variables.linux_trt_version_cuda13 }}
${{ if eq(parameters.CudaVersion, '12.8') }}:
value: ${{ variables.linux_trt_version_cuda12 }}
steps:
Expand Down
Loading
Loading