From 1e0a4557bab29d039d9e07274796aa4f81a03c5a Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 11:23:03 -0500 Subject: [PATCH 01/40] Update CI --- .github/workflows/UnitTests.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index fd793e138..2718a99b7 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -33,19 +33,19 @@ jobs: strategy: matrix: os: - - macos-13 + - macos-15-intel - ubuntu-latest - windows-latest julia_version: - - "1.6" + - "1.10" - "1" - "nightly" julia_arch: [x64, x86] exclude: - - os: macos-13 + - os: macos-15-intel julia_arch: x86 include: - - os: macos-14 + - os: macos-15 julia_arch: "aarch64" julia_version: "1" @@ -88,10 +88,10 @@ jobs: strategy: matrix: os: - - macos-13 + - macos-15-intel - ubuntu-latest julia_version: - - "1.6" + - "1.10" - "1" - "nightly" julia_arch: [x64] @@ -139,8 +139,8 @@ jobs: strategy: matrix: os: - - macos-13 - - macos-14 + - macos-15 + - macos-15-intel mpi: - mpich - openmpi @@ -150,9 +150,9 @@ jobs: - "x64" - "aarch64" exclude: - - os: macos-13 + - os: macos-15-intel julia_arch: "aarch64" - - os: macos-14 + - os: macos-15 julia_arch: "x64" fail-fast: false @@ -344,18 +344,18 @@ jobs: strategy: matrix: os: - - macos-13 + - macos-15-intel - ubuntu-latest mpi: [mpitrampoline] julia_version: - - "1.6" + - "1.10" - "1" - "nightly" julia_arch: - x64 - x86 exclude: - - os: macos-13 + - os: macos-15-intel julia_arch: x86 fail-fast: false From c829ee1217c28d8d3a47c7788f34e2452f558b75 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 12:00:24 -0500 Subject: [PATCH 02/40] CI: Update OpenMPI and UCX --- .buildkite/pipeline.yml | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 0c8cf1cff..9766ee354 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -8,8 +8,8 @@ cuda: "*" env: OPENMPI_VER: "4.1" - OPENMPI_VER_FULL: "4.1.4" - UCX_VER: "1.12.1" + OPENMPI_VER_FULL: "4.1.8" + UCX_VER: "1.19.1" # "1.12.1" CCACHE_DIR: "/root/ccache" commands: | echo "--- Install packages" @@ -48,12 +48,9 @@ matrix: setup: version: - - "1.6" - - "1.7" - - "1.8" - - "1.9" - "1.10" - "1.11" + - "1.12" concurrency: 1 concurrency_group: mpi_cuda plugins: @@ -110,8 +107,8 @@ rocm: "*" env: OPENMPI_VER: "5.0" - OPENMPI_VER_FULL: "5.0.3" - UCX_VER: "1.17.0" + OPENMPI_VER_FULL: "5.0.9" + UCX_VER: "1.19.1" # "1.17.0" CCACHE_DIR: "/root/ccache" commands: | echo "--- Install packages" @@ -151,7 +148,7 @@ setup: version: - "1.10" - - "1.11" + - "1.12" concurrency: 1 concurrency_group: mpi_rocm plugins: From 2636fb8a11112970af179799e3cd1046f48e5614 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 13:43:51 -0500 Subject: [PATCH 03/40] Correct test_io_shared --- test/test_io_shared.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index bf2c7f0b4..ccfffd118 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -46,6 +46,7 @@ MPI.Barrier(comm) MPI.File.sync(fh) MPI.File.write_ordered(fh, fill(Int64(rank), rank+1)) +MPI.Barrier(comm) @test MPI.File.get_position_shared(fh) == sum(1:sz) MPI.File.seek_shared(fh, 0) From 980bcad2b9d97e9da5c363f49a67c2b23801f592 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 13:44:05 -0500 Subject: [PATCH 04/40] Debug test_cooperative_wait --- test/test_cooperative_wait.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/test/test_cooperative_wait.jl b/test/test_cooperative_wait.jl index 181b1f544..9d3a0aa82 100644 --- a/test/test_cooperative_wait.jl +++ b/test/test_cooperative_wait.jl @@ -1,10 +1,14 @@ # tests for the various kinds of waits include("common.jl") -MPI.Init(threadlevel=:multiple) +provided = MPI.Init(threadlevel=:multiple) +if !(provided == MPI.ThreadLevel(:multiple)) + @show provided MPI.ThreadLevel(:multiple) +end +@assert provided == MPI.ThreadLevel(:multiple) myrank = MPI.Comm_rank(MPI.COMM_WORLD) -commsize = MPI.Comm_rank(MPI.COMM_WORLD) +commsize = MPI.Comm_size(MPI.COMM_WORLD) nsends = 2 send_arr = [ArrayType{Int}([i]) for i = 1:nsends] @@ -31,6 +35,7 @@ end @test recv_check == ones(Int, nsends) @test send_check == ones(Int, nsends) +@test all(recv_arr[i] == [i] for i = 1:nsends) MPI.Barrier(MPI.COMM_WORLD) MPI.Finalize() From 6b39a68c1b328e6c608de58561414f1b7868b835 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 13:44:23 -0500 Subject: [PATCH 05/40] Buildkite: Do not test Julia 1.11 --- .buildkite/pipeline.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 9766ee354..f4031c85f 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -49,7 +49,6 @@ setup: version: - "1.10" - - "1.11" - "1.12" concurrency: 1 concurrency_group: mpi_cuda From 82784fa314c2bdc237d8b14beb2b78a56cf8b0d4 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 13:47:40 -0500 Subject: [PATCH 06/40] CI: Test Julia 1.6 again --- .github/workflows/UnitTests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index 2718a99b7..3a0c095e5 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -37,6 +37,7 @@ jobs: - ubuntu-latest - windows-latest julia_version: + - "1.6" - "1.10" - "1" - "nightly" @@ -91,6 +92,7 @@ jobs: - macos-15-intel - ubuntu-latest julia_version: + - "1.6" - "1.10" - "1" - "nightly" @@ -348,6 +350,7 @@ jobs: - ubuntu-latest mpi: [mpitrampoline] julia_version: + - "1.6" - "1.10" - "1" - "nightly" From ee8488bd1edfb7916c0e41037a3fe7ee74d9dc20 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 14:43:08 -0500 Subject: [PATCH 07/40] test: Sync file --- test/test_io_shared.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index ccfffd118..ef026a056 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -47,6 +47,7 @@ MPI.File.sync(fh) MPI.File.write_ordered(fh, fill(Int64(rank), rank+1)) MPI.Barrier(comm) +MPI.File.sync(fh) @test MPI.File.get_position_shared(fh) == sum(1:sz) MPI.File.seek_shared(fh, 0) From 1158075fbee99620f27cc2984a52de30a546b877 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 14:43:24 -0500 Subject: [PATCH 08/40] test: Handle MPI implementation that do not support threads --- test/test_cooperative_wait.jl | 58 +++++++++++++++++------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/test/test_cooperative_wait.jl b/test/test_cooperative_wait.jl index 9d3a0aa82..72524d002 100644 --- a/test/test_cooperative_wait.jl +++ b/test/test_cooperative_wait.jl @@ -2,41 +2,41 @@ include("common.jl") provided = MPI.Init(threadlevel=:multiple) -if !(provided == MPI.ThreadLevel(:multiple)) - @show provided MPI.ThreadLevel(:multiple) -end -@assert provided == MPI.ThreadLevel(:multiple) -myrank = MPI.Comm_rank(MPI.COMM_WORLD) -commsize = MPI.Comm_size(MPI.COMM_WORLD) +if provided >= MPI.ThreadLevel(:multiple) -nsends = 2 -send_arr = [ArrayType{Int}([i]) for i = 1:nsends] -recv_arr = [ArrayType{Int}(undef,1) for i = 1:nsends] -synchronize() + myrank = MPI.Comm_rank(MPI.COMM_WORLD) + commsize = MPI.Comm_size(MPI.COMM_WORLD) -send_check = zeros(Int, nsends) -recv_check = zeros(Int, nsends) + nsends = 2 + send_arr = [ArrayType{Int}([i]) for i = 1:nsends] + recv_arr = [ArrayType{Int}(undef,1) for i = 1:nsends] + synchronize() -@sync for i = 1:nsends - Threads.@spawn begin - recv_req = MPI.Irecv!(recv_arr[i], MPI.COMM_WORLD; source=myrank, tag=i) - wait(recv_req) - @test MPI.isnull(recv_req) - recv_check[i] += 1 - end - Threads.@spawn begin - send_req = MPI.Isend(send_arr[i], MPI.COMM_WORLD; dest=myrank, tag=i) - wait(send_req) - @test MPI.isnull(send_req) - send_check[i] += 1 + send_check = zeros(Int, nsends) + recv_check = zeros(Int, nsends) + + @sync for i = 1:nsends + Threads.@spawn begin + recv_req = MPI.Irecv!(recv_arr[i], MPI.COMM_WORLD; source=myrank, tag=i) + wait(recv_req) + @test MPI.isnull(recv_req) + recv_check[i] += 1 + end + Threads.@spawn begin + send_req = MPI.Isend(send_arr[i], MPI.COMM_WORLD; dest=myrank, tag=i) + wait(send_req) + @test MPI.isnull(send_req) + send_check[i] += 1 + end end -end -@test recv_check == ones(Int, nsends) -@test send_check == ones(Int, nsends) -@test all(recv_arr[i] == [i] for i = 1:nsends) + @test recv_check == ones(Int, nsends) + @test send_check == ones(Int, nsends) + @test all(recv_arr[i] == [i] for i = 1:nsends) + +end -MPI.Barrier(MPI.COMM_WORLD) +# MPI.Barrier(MPI.COMM_WORLD) MPI.Finalize() @test MPI.Finalized() From 27f575c29875d4ee47ed50c1137b86551c911c01 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 15:01:49 -0500 Subject: [PATCH 09/40] test: Copy data to CPU before testing --- test/test_cooperative_wait.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_cooperative_wait.jl b/test/test_cooperative_wait.jl index 72524d002..1340e127c 100644 --- a/test/test_cooperative_wait.jl +++ b/test/test_cooperative_wait.jl @@ -33,7 +33,8 @@ if provided >= MPI.ThreadLevel(:multiple) @test recv_check == ones(Int, nsends) @test send_check == ones(Int, nsends) - @test all(recv_arr[i] == [i] for i = 1:nsends) + @test all(Array(send_arr[i]) == [i] for i = 1:nsends) + @test all(Array(recv_arr[i]) == [i] for i = 1:nsends) end From 17965c5ecf8217263cd2586bfce4798799fc7e8f Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 15:02:08 -0500 Subject: [PATCH 10/40] CI: Downgrade OpenMPI_jll for testing with Julia 1.6 --- .github/workflows/UnitTests.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index 3a0c095e5..730853844 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -89,7 +89,7 @@ jobs: strategy: matrix: os: - - macos-15-intel + - macos-15-intel - ubuntu-latest julia_version: - "1.6" @@ -129,6 +129,11 @@ jobs: shell: julia --color=yes --project=test {0} run: | using Pkg + if VERSION < v"1.10" + # OpenMPI_jll 5.0.9 has only been built for `libgfortran5`, + # and the Julia 1.6 we're using here uses `libgfortran4`. + Pkg.add(name="OpenMPI_jll", version="5.0.8") + end Pkg.develop(path="lib/MPIPreferences") using MPIPreferences MPIPreferences.use_jll_binary("OpenMPI_jll", export_prefs=true) From 86c2c427607b5e96d6195ec662d5fa1bb5fa7ac9 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 15:03:48 -0500 Subject: [PATCH 11/40] test: Give Windows a chance for shared file operations --- test/test_io_shared.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index ef026a056..c60caf666 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -62,12 +62,12 @@ MPI.File.read_ordered!(fh, buf) MPI.Barrier(comm) MPI.File.sync(fh) -if Sys.iswindows() - # TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 - @test_skip MPI.File.get_position_shared(fh) == sum(1:sz) -else +#TODO if Sys.iswindows() +#TODO # TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 +#TODO @test_skip MPI.File.get_position_shared(fh) == sum(1:sz) +#TODO else @test MPI.File.get_position_shared(fh) == sum(1:sz) -end +#TODO end MPI.File.set_view!(fh, 0, MPI.Datatype(UInt8), MPI.Datatype(UInt8)) MPI.Barrier(comm) From d17c9e94420707783b5b8251c212e95ce693e6a1 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 15:14:20 -0500 Subject: [PATCH 12/40] CI: Downgrade OpenMPI_jll for testing with Julia 1.6 --- .github/workflows/UnitTests.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index 730853844..287b0d34f 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -129,15 +129,15 @@ jobs: shell: julia --color=yes --project=test {0} run: | using Pkg + Pkg.develop(path="lib/MPIPreferences") + using MPIPreferences + MPIPreferences.use_jll_binary("OpenMPI_jll", export_prefs=true) + rm("test/Manifest.toml") if VERSION < v"1.10" # OpenMPI_jll 5.0.9 has only been built for `libgfortran5`, # and the Julia 1.6 we're using here uses `libgfortran4`. Pkg.add(name="OpenMPI_jll", version="5.0.8") end - Pkg.develop(path="lib/MPIPreferences") - using MPIPreferences - MPIPreferences.use_jll_binary("OpenMPI_jll", export_prefs=true) - rm("test/Manifest.toml") - uses: julia-actions/julia-runtest@v1 From be0cfa9750c604c832633a5370d86b911bd55d56 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 16:12:53 -0500 Subject: [PATCH 13/40] Try fixing the Julia 1.12 rocm build --- .buildkite/pipeline.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index f4031c85f..4c3c44e14 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -107,7 +107,7 @@ env: OPENMPI_VER: "5.0" OPENMPI_VER_FULL: "5.0.9" - UCX_VER: "1.19.1" # "1.17.0" + UCX_VER: "1.18.1" # "1.19.1" # "1.17.0" CCACHE_DIR: "/root/ccache" commands: | echo "--- Install packages" @@ -147,6 +147,14 @@ setup: version: - "1.10" + - "1.11" + # + # Skip the test with Julia 1.12 because it segfaults while installing packages: + # [amdgpu1:516 :0:516] Caught signal 11 (Segmentation fault: invalid permissions for mapped object at address 0x7ee1a9ec5000) + # ==== backtrace (tid: 516) ==== + # 0 /var/lib/buildkite-agent/builds/gpuci-9/julialang/mpi-dot-jl/openmpi/lib/libucs.so.0(ucs_handle_error+0x2e4) [0x7ee18c9bc4d4] + # 1 /var/lib/buildkite-agent/builds/gpuci-9/julialang/mpi-dot-jl/openmpi/lib/libucs.so.0(+0x3b6ca) [0x7ee18c9bc6ca] + # - "1.12" concurrency: 1 concurrency_group: mpi_rocm From ad4dc47b8b6a3edc7add533e71e7895a6d589a57 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 16:13:06 -0500 Subject: [PATCH 14/40] Require Julia 1.10 --- .github/workflows/UnitTests.yml | 11 +++-------- Project.toml | 2 +- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index 287b0d34f..04d81abee 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -37,7 +37,7 @@ jobs: - ubuntu-latest - windows-latest julia_version: - - "1.6" + # - "1.6" - "1.10" - "1" - "nightly" @@ -92,7 +92,7 @@ jobs: - macos-15-intel - ubuntu-latest julia_version: - - "1.6" + # - "1.6" - "1.10" - "1" - "nightly" @@ -133,11 +133,6 @@ jobs: using MPIPreferences MPIPreferences.use_jll_binary("OpenMPI_jll", export_prefs=true) rm("test/Manifest.toml") - if VERSION < v"1.10" - # OpenMPI_jll 5.0.9 has only been built for `libgfortran5`, - # and the Julia 1.6 we're using here uses `libgfortran4`. - Pkg.add(name="OpenMPI_jll", version="5.0.8") - end - uses: julia-actions/julia-runtest@v1 @@ -355,7 +350,7 @@ jobs: - ubuntu-latest mpi: [mpitrampoline] julia_version: - - "1.6" + # - "1.6" - "1.10" - "1" - "nightly" diff --git a/Project.toml b/Project.toml index 7e3d66252..ccbddc9db 100644 --- a/Project.toml +++ b/Project.toml @@ -33,7 +33,7 @@ PrecompileTools = "1.0.1" Requires = "~0.5, 1.0" Serialization = "1" Sockets = "1" -julia = "1.6" +julia = "1.10" [extensions] AMDGPUExt = "AMDGPU" From ef0723bfaaeabb11d991b2ddd39b8ece8f5df21e Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 16:13:24 -0500 Subject: [PATCH 15/40] test: Windows still fails for shared file operations --- test/test_io_shared.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index c60caf666..ef026a056 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -62,12 +62,12 @@ MPI.File.read_ordered!(fh, buf) MPI.Barrier(comm) MPI.File.sync(fh) -#TODO if Sys.iswindows() -#TODO # TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 -#TODO @test_skip MPI.File.get_position_shared(fh) == sum(1:sz) -#TODO else +if Sys.iswindows() + # TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 + @test_skip MPI.File.get_position_shared(fh) == sum(1:sz) +else @test MPI.File.get_position_shared(fh) == sum(1:sz) -#TODO end +end MPI.File.set_view!(fh, 0, MPI.Datatype(UInt8), MPI.Datatype(UInt8)) MPI.Barrier(comm) From b40166a64b117d2292a4f2fa02994782d07f7c04 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 16:39:25 -0500 Subject: [PATCH 16/40] CI: Play with OpenMPI versions --- .buildkite/pipeline.yml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 4c3c44e14..74b74bb19 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -7,9 +7,11 @@ queue: "juliagpu" cuda: "*" env: - OPENMPI_VER: "4.1" - OPENMPI_VER_FULL: "4.1.8" - UCX_VER: "1.19.1" # "1.12.1" + #TODO OPENMPI_VER: "4.1" + #TODO OPENMPI_VER_FULL: "4.1.8" + OPENMPI_VER: "5.0" + OPENMPI_VER_FULL: "5.0.9" + UCX_VER: "1.19.1" CCACHE_DIR: "/root/ccache" commands: | echo "--- Install packages" @@ -105,9 +107,11 @@ queue: "juliagpu" rocm: "*" env: - OPENMPI_VER: "5.0" - OPENMPI_VER_FULL: "5.0.9" - UCX_VER: "1.18.1" # "1.19.1" # "1.17.0" + #TODO OPENMPI_VER: "5.0" + #TODO OPENMPI_VER_FULL: "5.0.9" + OPENMPI_VER: "4.1" + OPENMPI_VER_FULL: "4.1.8" + UCX_VER: "1.19.1" CCACHE_DIR: "/root/ccache" commands: | echo "--- Install packages" From adce9eb338f9c8c434d1bd7c5cd616961adea701 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 16:39:35 -0500 Subject: [PATCH 17/40] test: Play with MPI_Barrier --- test/test_cooperative_wait.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_cooperative_wait.jl b/test/test_cooperative_wait.jl index 1340e127c..7bbb1b7f0 100644 --- a/test/test_cooperative_wait.jl +++ b/test/test_cooperative_wait.jl @@ -38,6 +38,6 @@ if provided >= MPI.ThreadLevel(:multiple) end -# MPI.Barrier(MPI.COMM_WORLD) +MPI.Barrier(MPI.COMM_WORLD) MPI.Finalize() @test MPI.Finalized() From 3e2b1473def03c04233157e990758bea56eb913d Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 17:28:12 -0500 Subject: [PATCH 18/40] Buildkite: Disable broken Julia/OpenMPI versions --- .buildkite/pipeline.yml | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 74b74bb19..575c4edb0 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -7,10 +7,11 @@ queue: "juliagpu" cuda: "*" env: - #TODO OPENMPI_VER: "4.1" - #TODO OPENMPI_VER_FULL: "4.1.8" - OPENMPI_VER: "5.0" - OPENMPI_VER_FULL: "5.0.9" + # This is broken for OpenMPI 5 and Julia 1.12, so we stick with OpenMPI 4 + OPENMPI_VER: "4.1" + OPENMPI_VER_FULL: "4.1.8" + # OPENMPI_VER: "5.0" + # OPENMPI_VER_FULL: "5.0.9" UCX_VER: "1.19.1" CCACHE_DIR: "/root/ccache" commands: | @@ -107,10 +108,12 @@ queue: "juliagpu" rocm: "*" env: - #TODO OPENMPI_VER: "5.0" - #TODO OPENMPI_VER_FULL: "5.0.9" - OPENMPI_VER: "4.1" - OPENMPI_VER_FULL: "4.1.8" + # This is broken for OpenMPI 5 and Julia 1.12. + # It is broken for OpenMPI 4 for all versions of Julia. So we use OpenMPI 5 and skip Julia 1.12 + OPENMPI_VER: "5.0" + OPENMPI_VER_FULL: "5.0.9" + # OPENMPI_VER: "4.1" + # OPENMPI_VER_FULL: "4.1.8" UCX_VER: "1.19.1" CCACHE_DIR: "/root/ccache" commands: | @@ -159,7 +162,7 @@ # 0 /var/lib/buildkite-agent/builds/gpuci-9/julialang/mpi-dot-jl/openmpi/lib/libucs.so.0(ucs_handle_error+0x2e4) [0x7ee18c9bc4d4] # 1 /var/lib/buildkite-agent/builds/gpuci-9/julialang/mpi-dot-jl/openmpi/lib/libucs.so.0(+0x3b6ca) [0x7ee18c9bc6ca] # - - "1.12" + # - "1.12" concurrency: 1 concurrency_group: mpi_rocm plugins: From 07abfec2cc0455a3850202774b21983e633362f2 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 17:28:23 -0500 Subject: [PATCH 19/40] test: Fiddle with shared I/O, again --- test/test_io_shared.jl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index ef026a056..73d3ab929 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -48,6 +48,7 @@ MPI.File.sync(fh) MPI.File.write_ordered(fh, fill(Int64(rank), rank+1)) MPI.Barrier(comm) MPI.File.sync(fh) +MPI.Barrier(comm) @test MPI.File.get_position_shared(fh) == sum(1:sz) MPI.File.seek_shared(fh, 0) @@ -62,12 +63,13 @@ MPI.File.read_ordered!(fh, buf) MPI.Barrier(comm) MPI.File.sync(fh) -if Sys.iswindows() - # TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 - @test_skip MPI.File.get_position_shared(fh) == sum(1:sz) -else +MPI.Barrier(comm) +#TODO if Sys.iswindows() +#TODO # TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 +#TODO @test_skip MPI.File.get_position_shared(fh) == sum(1:sz) +#TODO else @test MPI.File.get_position_shared(fh) == sum(1:sz) -end +#TODO end MPI.File.set_view!(fh, 0, MPI.Datatype(UInt8), MPI.Datatype(UInt8)) MPI.Barrier(comm) From 07aa90d74a7b17c52cdfbf86ef2530213508fdfc Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 18:37:18 -0500 Subject: [PATCH 20/40] Buildkite: Small fixes --- .buildkite/pipeline.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 575c4edb0..73a763fa7 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -17,14 +17,14 @@ commands: | echo "--- Install packages" apt-get install --yes --no-install-recommends curl ccache - export PATH="/usr/lib/ccache/:$$PATH" + export PATH="/usr/lib/ccache:$$PATH" echo "--- Build UCX" curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz tar -zxf ucx.tar.gz pushd ucx-* ./configure --with-cuda=/usr/local/cuda --enable-mt --prefix=$$(realpath ../mpi-prefix) - make -j + make -j $(nproc) make install popd @@ -33,7 +33,7 @@ tar -zxf openmpi.tar.gz pushd openmpi-$${OPENMPI_VER_FULL} ./configure --with-ucx=$$(realpath ../mpi-prefix) --with-cuda=/usr/local/cuda --prefix=$$(realpath ../mpi-prefix) - make -j + make -j $(nproc) make install popd @@ -119,14 +119,14 @@ commands: | echo "--- Install packages" apt-get install --yes --no-install-recommends curl ccache - export PATH="/usr/lib/ccache/:$$PATH" + export PATH="/usr/lib/ccache:$$PATH" echo "--- Build UCX" curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz tar -zxf ucx.tar.gz pushd ucx-* ./configure --with-rocm --enable-mt --prefix=$$(realpath ../mpi-prefix) - make -j + make -j $(nproc) make install popd @@ -135,7 +135,7 @@ tar -zxf openmpi.tar.gz pushd openmpi-$${OPENMPI_VER_FULL} ./configure --with-ucx=$$(realpath ../mpi-prefix) --with-rocm --prefix=$$(realpath ../mpi-prefix) - make -j + make -j $(nproc) make install popd From e2711932a4b1f3845e0b281c407da19d9fc559f5 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 18:37:36 -0500 Subject: [PATCH 21/40] test: Try to fix shared I/O for Apple --- test/test_io_shared.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index 73d3ab929..7c2e6cee6 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -48,8 +48,12 @@ MPI.File.sync(fh) MPI.File.write_ordered(fh, fill(Int64(rank), rank+1)) MPI.Barrier(comm) MPI.File.sync(fh) -MPI.Barrier(comm) -@test MPI.File.get_position_shared(fh) == sum(1:sz) +if Sys.isapple() + # TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 + @test_skip MPI.File.get_position_shared(fh) == sum(1:sz) +else + @test MPI.File.get_position_shared(fh) == sum(1:sz) +end MPI.File.seek_shared(fh, 0) @test MPI.File.get_position_shared(fh) == 0 From 483678cee0a118b457a6c17f16bb6b6f86b1a4fe Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 18:37:53 -0500 Subject: [PATCH 22/40] test: Give up on cooperative_wait for Windows --- test/runtests.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 74530018d..30d27b674 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -112,6 +112,18 @@ testfiles = sort(filter(istest, readdir(testdir))) """ exception=(e, catch_backtrace()) @test_broken false end + elseif f == "test_cooperative_wait.jl" && Sys.iswindows() + # This test is broken on Windows. We don't know why. + try + run(cmd()) + catch e + @error """ + $(f) tests failed. Thsi may be because the Windows MPI implementation is quite old; + it appears unsupported and has not seen bug fixes for a long time. + See the full error message for more details. Some messages may have been written above. + """ exception=(e, catch_backtrace()) + @test_broken false + end else # MPI_Reduce with MPICH 3.4.2 on macOS when root != 0 and # when recvbuf == C_NULL segfaults From 73bbdd752fb1f24f50ee0c8b8c6a46f5660c4feb Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 18:38:56 -0500 Subject: [PATCH 23/40] CI: Clean up Julia version selection --- .github/workflows/UnitTests.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index 04d81abee..d3ff8dbcf 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -37,8 +37,7 @@ jobs: - ubuntu-latest - windows-latest julia_version: - # - "1.6" - - "1.10" + - "min" - "1" - "nightly" julia_arch: [x64, x86] @@ -92,8 +91,7 @@ jobs: - macos-15-intel - ubuntu-latest julia_version: - # - "1.6" - - "1.10" + - "min" - "1" - "nightly" julia_arch: [x64] @@ -350,8 +348,7 @@ jobs: - ubuntu-latest mpi: [mpitrampoline] julia_version: - # - "1.6" - - "1.10" + - "min" - "1" - "nightly" julia_arch: From 7390a5b8b7fe977cd7f2a3c7f1338ee8fcf40031 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 12 Dec 2025 19:55:34 -0500 Subject: [PATCH 24/40] test: Clean up test skipping --- test/test_io_shared.jl | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index 7c2e6cee6..109ce3f32 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -48,12 +48,8 @@ MPI.File.sync(fh) MPI.File.write_ordered(fh, fill(Int64(rank), rank+1)) MPI.Barrier(comm) MPI.File.sync(fh) -if Sys.isapple() - # TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 - @test_skip MPI.File.get_position_shared(fh) == sum(1:sz) -else - @test MPI.File.get_position_shared(fh) == sum(1:sz) -end +# TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 +@test MPI.File.get_position_shared(fh) == sum(1:sz) skip=Sys.isapple() MPI.File.seek_shared(fh, 0) @test MPI.File.get_position_shared(fh) == 0 @@ -68,12 +64,8 @@ MPI.File.read_ordered!(fh, buf) MPI.Barrier(comm) MPI.File.sync(fh) MPI.Barrier(comm) -#TODO if Sys.iswindows() -#TODO # TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 -#TODO @test_skip MPI.File.get_position_shared(fh) == sum(1:sz) -#TODO else - @test MPI.File.get_position_shared(fh) == sum(1:sz) -#TODO end +# TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 +@test MPI.File.get_position_shared(fh) == sum(1:sz) skip=Sys.iswindows() MPI.File.set_view!(fh, 0, MPI.Datatype(UInt8), MPI.Datatype(UInt8)) MPI.Barrier(comm) From 04002dbf677ad4489f5404d30df32afe0e9181d7 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Sat, 13 Dec 2025 10:52:15 -0500 Subject: [PATCH 25/40] test: Update pointer to issue --- test/test_io_shared.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index 109ce3f32..9833bbb77 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -48,7 +48,7 @@ MPI.File.sync(fh) MPI.File.write_ordered(fh, fill(Int64(rank), rank+1)) MPI.Barrier(comm) MPI.File.sync(fh) -# TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 +# TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/879 @test MPI.File.get_position_shared(fh) == sum(1:sz) skip=Sys.isapple() MPI.File.seek_shared(fh, 0) @@ -64,7 +64,7 @@ MPI.File.read_ordered!(fh, buf) MPI.Barrier(comm) MPI.File.sync(fh) MPI.Barrier(comm) -# TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 +# TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/879 @test MPI.File.get_position_shared(fh) == sum(1:sz) skip=Sys.iswindows() MPI.File.set_view!(fh, 0, MPI.Datatype(UInt8), MPI.Datatype(UInt8)) From 42eadfaee18dafe8b197367994ee6cd88bd9907b Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Sat, 13 Dec 2025 11:09:34 -0500 Subject: [PATCH 26/40] CI: Correct environment variable name --- .github/workflows/UnitTests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index d3ff8dbcf..e2c5c4ee4 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -556,7 +556,7 @@ jobs: MV2_SMP_USE_CMA: 0 # Work around issue with affinity not set. Ref: # https://github.com/JuliaParallel/MPI.jl/pull/810#issuecomment-1920255386 - MVP_ENABLE_AFFINITY: 0 + MV2_ENABLE_AFFINITY: 0 steps: - name: Checkout From 04c82960a32c4b163afdf2d1e283db5a75602b44 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Sat, 13 Dec 2025 11:20:44 -0500 Subject: [PATCH 27/40] test: Skip another broken Windows test --- test/test_io_shared.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index 9833bbb77..37a568056 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -49,7 +49,7 @@ MPI.File.write_ordered(fh, fill(Int64(rank), rank+1)) MPI.Barrier(comm) MPI.File.sync(fh) # TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/879 -@test MPI.File.get_position_shared(fh) == sum(1:sz) skip=Sys.isapple() +@test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.isapple() || Sys.iswindows() MPI.File.seek_shared(fh, 0) @test MPI.File.get_position_shared(fh) == 0 @@ -65,7 +65,7 @@ MPI.Barrier(comm) MPI.File.sync(fh) MPI.Barrier(comm) # TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/879 -@test MPI.File.get_position_shared(fh) == sum(1:sz) skip=Sys.iswindows() +@test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.iswindows() MPI.File.set_view!(fh, 0, MPI.Datatype(UInt8), MPI.Datatype(UInt8)) MPI.Barrier(comm) From 0a4a970f8fca76db3f6a942fa58a6e113d1964f5 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Tue, 16 Dec 2025 12:43:34 -0500 Subject: [PATCH 28/40] Update test/test_io_shared.jl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mosè Giordano <765740+giordano@users.noreply.github.com> --- test/test_io_shared.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index 37a568056..c68d76091 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -64,7 +64,7 @@ MPI.File.read_ordered!(fh, buf) MPI.Barrier(comm) MPI.File.sync(fh) MPI.Barrier(comm) -# TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/879 +# TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 @test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.iswindows() MPI.File.set_view!(fh, 0, MPI.Datatype(UInt8), MPI.Datatype(UInt8)) From f6f0b0b90292bc0efdc574cab52637ff7057c147 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Tue, 16 Dec 2025 12:45:10 -0500 Subject: [PATCH 29/40] CI: Revert change to env var name --- .github/workflows/UnitTests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index e2c5c4ee4..d3ff8dbcf 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -556,7 +556,7 @@ jobs: MV2_SMP_USE_CMA: 0 # Work around issue with affinity not set. Ref: # https://github.com/JuliaParallel/MPI.jl/pull/810#issuecomment-1920255386 - MV2_ENABLE_AFFINITY: 0 + MVP_ENABLE_AFFINITY: 0 steps: - name: Checkout From 94ab7527138628b950a8c1c29263dd6ebaf9e5e8 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Wed, 17 Dec 2025 08:59:07 -0500 Subject: [PATCH 30/40] Try to fix test_io_shared --- test/test_io_shared.jl | 61 +++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index c68d76091..3a129ac44 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -7,7 +7,7 @@ rank = MPI.Comm_rank(comm) sz = MPI.Comm_size(comm) filename = MPI.bcast(tempname(), 0, comm) -MPI.Barrier(comm) +#TODO MPI.Barrier(comm) # Collective write fh = MPI.File.open(comm, filename, read=true, write=true, create=true) @@ -16,21 +16,25 @@ fh = MPI.File.open(comm, filename, read=true, write=true, create=true) if !MPI.File.get_atomicity(fh) MPI.File.set_atomicity(fh, true) end - @test MPI.File.get_atomicity(fh) -MPI.Barrier(comm) -MPI.File.sync(fh) +function sync() + # First ensure that all local changes are flushed ... + MPI.File.sync(fh) + # ... then wait for all other process to finish doing that ... + MPI_Barrier(comm) + # ... then make sure we see all change that the other processes made. + MPI.File.sync(fh) +end + +sync() header = "my header" if rank == 0 MPI.File.write_shared(fh, header) end - -# TODO: is there a better way to synchronise shared pointers? -MPI.Barrier(comm) -MPI.File.sync(fh) +sync() offset = MPI.File.get_position_shared(fh) @test offset == sizeof(header) @@ -38,52 +42,43 @@ byte_offset = MPI.File.get_byte_offset(fh, offset) @test byte_offset == offset MPI.File.set_view!(fh, byte_offset, MPI.Datatype(Int64), MPI.Datatype(Int64)) -MPI.Barrier(comm) -MPI.File.sync(fh) +sync() @test MPI.File.get_position_shared(fh) == 0 -MPI.Barrier(comm) -MPI.File.sync(fh) - MPI.File.write_ordered(fh, fill(Int64(rank), rank+1)) -MPI.Barrier(comm) -MPI.File.sync(fh) -# TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/879 -@test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.isapple() || Sys.iswindows() +sync() +#TODO # TODO: this has to be fixed: +#TODO # https://github.com/JuliaParallel/MPI.jl/issues/555, +#TODO # https://github.com/JuliaParallel/MPI.jl/issues/579 +#TODO @test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.isapple() || Sys.iswindows() +@test MPI.File.get_position_shared(fh) == sum(1:sz) MPI.File.seek_shared(fh, 0) @test MPI.File.get_position_shared(fh) == 0 - -MPI.Barrier(comm) -MPI.File.sync(fh) +sync() buf = zeros(Int64, rank+1) MPI.File.read_ordered!(fh, buf) @test buf == fill(Int64(rank), rank+1) +sync() -MPI.Barrier(comm) -MPI.File.sync(fh) -MPI.Barrier(comm) -# TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 -@test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.iswindows() +#TODO # TODO: this has to be fixed: +#TODO # https://github.com/JuliaParallel/MPI.jl/issues/555 +#TODO @test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.iswindows() +@test MPI.File.get_position_shared(fh) == sum(1:sz) MPI.File.set_view!(fh, 0, MPI.Datatype(UInt8), MPI.Datatype(UInt8)) -MPI.Barrier(comm) -MPI.File.sync(fh) +sync() MPI.File.seek_shared(fh, 0) @test MPI.File.get_position_shared(fh) == 0 - -MPI.Barrier(comm) -MPI.File.sync(fh) +sync() if rank == sz-1 buf = Array{UInt8}(undef, sizeof(header)) MPI.File.read_shared!(fh, buf) @test String(buf) == header end - -MPI.Barrier(comm) -MPI.File.sync(fh) +sync() @test MPI.File.get_position_shared(fh) == sizeof(header) From 5deeb00aa21f00bcb829b746552ebca554e85564 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Wed, 17 Dec 2025 09:06:22 -0500 Subject: [PATCH 31/40] Correct function name --- test/test_io_shared.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index 3a129ac44..d62bc97b0 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -22,7 +22,7 @@ function sync() # First ensure that all local changes are flushed ... MPI.File.sync(fh) # ... then wait for all other process to finish doing that ... - MPI_Barrier(comm) + MPI.Barrier(comm) # ... then make sure we see all change that the other processes made. MPI.File.sync(fh) end From c9c8ffff46009732b8c87cb929ee279b03ccb9fb Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Wed, 17 Dec 2025 09:31:33 -0500 Subject: [PATCH 32/40] test_io_shared: Test again --- test/test_io_shared.jl | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index d62bc97b0..6aa050e0a 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -7,26 +7,23 @@ rank = MPI.Comm_rank(comm) sz = MPI.Comm_size(comm) filename = MPI.bcast(tempname(), 0, comm) -#TODO MPI.Barrier(comm) - # Collective write fh = MPI.File.open(comm, filename, read=true, write=true, create=true) @test MPI.File.get_position_shared(fh) == 0 -if !MPI.File.get_atomicity(fh) - MPI.File.set_atomicity(fh, true) -end -@test MPI.File.get_atomicity(fh) - function sync() # First ensure that all local changes are flushed ... MPI.File.sync(fh) # ... then wait for all other process to finish doing that ... MPI.Barrier(comm) - # ... then make sure we see all change that the other processes made. + # ... then make sure we see all changes that the other processes made. MPI.File.sync(fh) end +if !MPI.File.get_atomicity(fh) + MPI.File.set_atomicity(fh, true) +end +@test MPI.File.get_atomicity(fh) sync() header = "my header" @@ -51,6 +48,7 @@ sync() #TODO # https://github.com/JuliaParallel/MPI.jl/issues/555, #TODO # https://github.com/JuliaParallel/MPI.jl/issues/579 #TODO @test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.isapple() || Sys.iswindows() +# TODO: still broken on Apple @test MPI.File.get_position_shared(fh) == sum(1:sz) MPI.File.seek_shared(fh, 0) From 3dbc974a987f2849d8df5aa0e581354685aefab7 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Wed, 17 Dec 2025 09:52:18 -0500 Subject: [PATCH 33/40] test_io_shared: Test again --- .github/workflows/UnitTests.yml | 2 ++ test/test_io_shared.jl | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index d3ff8dbcf..b39123574 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -556,6 +556,8 @@ jobs: MV2_SMP_USE_CMA: 0 # Work around issue with affinity not set. Ref: # https://github.com/JuliaParallel/MPI.jl/pull/810#issuecomment-1920255386 + # MVAPICH 2 and 3 use different environment variables; set both. + MV2_ENABLE_AFFINITY: 0 MVP_ENABLE_AFFINITY: 0 steps: diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index 6aa050e0a..bf896592d 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -48,7 +48,7 @@ sync() #TODO # https://github.com/JuliaParallel/MPI.jl/issues/555, #TODO # https://github.com/JuliaParallel/MPI.jl/issues/579 #TODO @test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.isapple() || Sys.iswindows() -# TODO: still broken on Apple +# TODO: still broken on Apple with MPICH @test MPI.File.get_position_shared(fh) == sum(1:sz) MPI.File.seek_shared(fh, 0) @@ -63,6 +63,7 @@ sync() #TODO # TODO: this has to be fixed: #TODO # https://github.com/JuliaParallel/MPI.jl/issues/555 #TODO @test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.iswindows() +# TODO: still broken on Windows with MPICH @test MPI.File.get_position_shared(fh) == sum(1:sz) MPI.File.set_view!(fh, 0, MPI.Datatype(UInt8), MPI.Datatype(UInt8)) From 4f33a8c909c034d3f0a53c0bffd905f8e4feb4ec Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Wed, 17 Dec 2025 10:18:12 -0500 Subject: [PATCH 34/40] test_io_shared: Test again --- test/test_io_shared.jl | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index bf896592d..198991230 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -1,5 +1,19 @@ include("common.jl") +# Find MPI vendor +library_version = MPI.Get_library_version() +# Peel off MPItrampoline if present +if startswith(library_version, "MPIwrapper ") + library_version = join(split(library_version, "\n")[2:end], "\n") +end +if startswith(library_version, "MPICH ") + vendor = :MPICH +elseif startswith(library_version, "Open MPI ") + vendor = :OpenMPI +else + vendor = nothing +end + MPI.Init() comm = MPI.COMM_WORLD @@ -44,12 +58,8 @@ sync() MPI.File.write_ordered(fh, fill(Int64(rank), rank+1)) sync() -#TODO # TODO: this has to be fixed: -#TODO # https://github.com/JuliaParallel/MPI.jl/issues/555, -#TODO # https://github.com/JuliaParallel/MPI.jl/issues/579 -#TODO @test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.isapple() || Sys.iswindows() -# TODO: still broken on Apple with MPICH -@test MPI.File.get_position_shared(fh) == sum(1:sz) +# https://github.com/JuliaParallel/MPI.jl/issues/879 +@test MPI.File.get_position_shared(fh) == sum(1:sz) skip = (vendor == :MPICH && Sys.isapple()) MPI.File.seek_shared(fh, 0) @test MPI.File.get_position_shared(fh) == 0 @@ -60,11 +70,8 @@ MPI.File.read_ordered!(fh, buf) @test buf == fill(Int64(rank), rank+1) sync() -#TODO # TODO: this has to be fixed: -#TODO # https://github.com/JuliaParallel/MPI.jl/issues/555 -#TODO @test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.iswindows() -# TODO: still broken on Windows with MPICH -@test MPI.File.get_position_shared(fh) == sum(1:sz) +# https://github.com/JuliaParallel/MPI.jl/issues/555 +@test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.iswindows() MPI.File.set_view!(fh, 0, MPI.Datatype(UInt8), MPI.Datatype(UInt8)) sync() From 6f6b49244b55c698f635d6582d8767d314bc6848 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 19 Dec 2025 09:45:01 -0500 Subject: [PATCH 35/40] test: Improve readability --- test/test_io_shared.jl | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index 198991230..0571e5e97 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -1,5 +1,15 @@ include("common.jl") +# Syncing parallel MPI I/O is a bit involved: +function sync(comm, fh) + # First ensure that all local changes are flushed ... + MPI.File.sync(fh) + # ... then wait for all other process to finish doing that ... + MPI.Barrier(comm) + # ... then make sure we see all changes that the other processes made. + MPI.File.sync(fh) +end + # Find MPI vendor library_version = MPI.Get_library_version() # Peel off MPItrampoline if present @@ -25,27 +35,18 @@ filename = MPI.bcast(tempname(), 0, comm) fh = MPI.File.open(comm, filename, read=true, write=true, create=true) @test MPI.File.get_position_shared(fh) == 0 -function sync() - # First ensure that all local changes are flushed ... - MPI.File.sync(fh) - # ... then wait for all other process to finish doing that ... - MPI.Barrier(comm) - # ... then make sure we see all changes that the other processes made. - MPI.File.sync(fh) -end - if !MPI.File.get_atomicity(fh) MPI.File.set_atomicity(fh, true) end @test MPI.File.get_atomicity(fh) -sync() +sync(comm, fh) header = "my header" if rank == 0 MPI.File.write_shared(fh, header) end -sync() +sync(comm, fh) offset = MPI.File.get_position_shared(fh) @test offset == sizeof(header) @@ -53,38 +54,38 @@ byte_offset = MPI.File.get_byte_offset(fh, offset) @test byte_offset == offset MPI.File.set_view!(fh, byte_offset, MPI.Datatype(Int64), MPI.Datatype(Int64)) -sync() +sync(comm, fh) @test MPI.File.get_position_shared(fh) == 0 MPI.File.write_ordered(fh, fill(Int64(rank), rank+1)) -sync() +sync(comm, fh) # https://github.com/JuliaParallel/MPI.jl/issues/879 @test MPI.File.get_position_shared(fh) == sum(1:sz) skip = (vendor == :MPICH && Sys.isapple()) MPI.File.seek_shared(fh, 0) @test MPI.File.get_position_shared(fh) == 0 -sync() +sync(comm, fh) buf = zeros(Int64, rank+1) MPI.File.read_ordered!(fh, buf) @test buf == fill(Int64(rank), rank+1) -sync() +sync(comm, fh) # https://github.com/JuliaParallel/MPI.jl/issues/555 @test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.iswindows() MPI.File.set_view!(fh, 0, MPI.Datatype(UInt8), MPI.Datatype(UInt8)) -sync() +sync(comm, fh) MPI.File.seek_shared(fh, 0) @test MPI.File.get_position_shared(fh) == 0 -sync() +sync(comm, fh) if rank == sz-1 buf = Array{UInt8}(undef, sizeof(header)) MPI.File.read_shared!(fh, buf) @test String(buf) == header end -sync() +sync(comm, fh) @test MPI.File.get_position_shared(fh) == sizeof(header) From 79c385fab374b236174b9ca71189075189ddc1f0 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 19 Dec 2025 10:17:21 -0500 Subject: [PATCH 36/40] test_io_shared: Disable OpenMPI/Apple as well --- test/test_io_shared.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index 0571e5e97..fe850234a 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -60,7 +60,7 @@ sync(comm, fh) MPI.File.write_ordered(fh, fill(Int64(rank), rank+1)) sync(comm, fh) # https://github.com/JuliaParallel/MPI.jl/issues/879 -@test MPI.File.get_position_shared(fh) == sum(1:sz) skip = (vendor == :MPICH && Sys.isapple()) +@test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.isapple() MPI.File.seek_shared(fh, 0) @test MPI.File.get_position_shared(fh) == 0 From cf00012fde112a4c18e3c99fd9696a41373af1ed Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 19 Dec 2025 10:41:58 -0500 Subject: [PATCH 37/40] test_io_shared: Disable another test on Apple --- test/test_io_shared.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index fe850234a..de99e3377 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -55,7 +55,8 @@ byte_offset = MPI.File.get_byte_offset(fh, offset) MPI.File.set_view!(fh, byte_offset, MPI.Datatype(Int64), MPI.Datatype(Int64)) sync(comm, fh) -@test MPI.File.get_position_shared(fh) == 0 +# https://github.com/JuliaParallel/MPI.jl/issues/879 +@test MPI.File.get_position_shared(fh) == 0 skip = Sys.isapple() MPI.File.write_ordered(fh, fill(Int64(rank), rank+1)) sync(comm, fh) From eb69374f51047e0b129ad279e808743cc98ac283 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 19 Dec 2025 11:09:03 -0500 Subject: [PATCH 38/40] test_io_shared: Disable another test on Windows --- test/test_io_shared.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index de99e3377..49770d8dc 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -55,8 +55,9 @@ byte_offset = MPI.File.get_byte_offset(fh, offset) MPI.File.set_view!(fh, byte_offset, MPI.Datatype(Int64), MPI.Datatype(Int64)) sync(comm, fh) +# https://github.com/JuliaParallel/MPI.jl/issues/555 # https://github.com/JuliaParallel/MPI.jl/issues/879 -@test MPI.File.get_position_shared(fh) == 0 skip = Sys.isapple() +@test MPI.File.get_position_shared(fh) == 0 skip = Sys.isapple() || Sys.iswindows() MPI.File.write_ordered(fh, fill(Int64(rank), rank+1)) sync(comm, fh) From 32d35bb1039992e0a73dc749a0bb69fdcb25bc45 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 19 Dec 2025 11:51:20 -0500 Subject: [PATCH 39/40] test_io_shared: Disable a test on Linux --- test/test_io_shared.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index 49770d8dc..242108bd0 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -57,7 +57,7 @@ MPI.File.set_view!(fh, byte_offset, MPI.Datatype(Int64), MPI.Datatype(Int64)) sync(comm, fh) # https://github.com/JuliaParallel/MPI.jl/issues/555 # https://github.com/JuliaParallel/MPI.jl/issues/879 -@test MPI.File.get_position_shared(fh) == 0 skip = Sys.isapple() || Sys.iswindows() +@test MPI.File.get_position_shared(fh) == 0 skip = Sys.isapple() || (vendor == :MPICH && Sys.isunix()) || Sys.iswindows() MPI.File.write_ordered(fh, fill(Int64(rank), rank+1)) sync(comm, fh) From 9fd579e291d472a2f6a074034405c89ae33e10d3 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Fri, 19 Dec 2025 13:01:14 -0500 Subject: [PATCH 40/40] CI: Fix white space --- .github/workflows/UnitTests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index b39123574..d52b56c50 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -88,7 +88,7 @@ jobs: strategy: matrix: os: - - macos-15-intel + - macos-15-intel - ubuntu-latest julia_version: - "min"