Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
1e0a455
Update CI
eschnett Dec 12, 2025
c829ee1
CI: Update OpenMPI and UCX
eschnett Dec 12, 2025
2636fb8
Correct test_io_shared
eschnett Dec 12, 2025
980bcad
Debug test_cooperative_wait
eschnett Dec 12, 2025
6b39a68
Buildkite: Do not test Julia 1.11
eschnett Dec 12, 2025
82784fa
CI: Test Julia 1.6 again
eschnett Dec 12, 2025
ee8488b
test: Sync file
eschnett Dec 12, 2025
1158075
test: Handle MPI implementation that do not support threads
eschnett Dec 12, 2025
27f575c
test: Copy data to CPU before testing
eschnett Dec 12, 2025
17965c5
CI: Downgrade OpenMPI_jll for testing with Julia 1.6
eschnett Dec 12, 2025
86c2c42
test: Give Windows a chance for shared file operations
eschnett Dec 12, 2025
d17c9e9
CI: Downgrade OpenMPI_jll for testing with Julia 1.6
eschnett Dec 12, 2025
be0cfa9
Try fixing the Julia 1.12 rocm build
eschnett Dec 12, 2025
ad4dc47
Require Julia 1.10
eschnett Dec 12, 2025
ef0723b
test: Windows still fails for shared file operations
eschnett Dec 12, 2025
b40166a
CI: Play with OpenMPI versions
eschnett Dec 12, 2025
adce9eb
test: Play with MPI_Barrier
eschnett Dec 12, 2025
3e2b147
Buildkite: Disable broken Julia/OpenMPI versions
eschnett Dec 12, 2025
07abfec
test: Fiddle with shared I/O, again
eschnett Dec 12, 2025
07aa90d
Buildkite: Small fixes
eschnett Dec 12, 2025
e271193
test: Try to fix shared I/O for Apple
eschnett Dec 12, 2025
483678c
test: Give up on cooperative_wait for Windows
eschnett Dec 12, 2025
73bbdd7
CI: Clean up Julia version selection
eschnett Dec 12, 2025
7390a5b
test: Clean up test skipping
eschnett Dec 13, 2025
04002db
test: Update pointer to issue
eschnett Dec 13, 2025
42eadfa
CI: Correct environment variable name
eschnett Dec 13, 2025
04c8296
test: Skip another broken Windows test
eschnett Dec 13, 2025
0a4a970
Update test/test_io_shared.jl
eschnett Dec 16, 2025
f6f0b0b
CI: Revert change to env var name
eschnett Dec 16, 2025
94ab752
Try to fix test_io_shared
eschnett Dec 17, 2025
5deeb00
Correct function name
eschnett Dec 17, 2025
c9c8fff
test_io_shared: Test again
eschnett Dec 17, 2025
3dbc974
test_io_shared: Test again
eschnett Dec 17, 2025
4f33a8c
test_io_shared: Test again
eschnett Dec 17, 2025
6f6b492
test: Improve readability
eschnett Dec 19, 2025
79c385f
test_io_shared: Disable OpenMPI/Apple as well
eschnett Dec 19, 2025
cf00012
test_io_shared: Disable another test on Apple
eschnett Dec 19, 2025
eb69374
test_io_shared: Disable another test on Windows
eschnett Dec 19, 2025
32d35bb
test_io_shared: Disable a test on Linux
eschnett Dec 19, 2025
9fd579e
CI: Fix white space
eschnett Dec 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 26 additions & 15 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,24 @@
queue: "juliagpu"
cuda: "*"
env:
# This is broken for OpenMPI 5 and Julia 1.12, so we stick with OpenMPI 4
OPENMPI_VER: "4.1"
OPENMPI_VER_FULL: "4.1.4"
UCX_VER: "1.12.1"
OPENMPI_VER_FULL: "4.1.8"
# OPENMPI_VER: "5.0"
# OPENMPI_VER_FULL: "5.0.9"
UCX_VER: "1.19.1"
CCACHE_DIR: "/root/ccache"
commands: |
echo "--- Install packages"
apt-get install --yes --no-install-recommends curl ccache
export PATH="/usr/lib/ccache/:$$PATH"
export PATH="/usr/lib/ccache:$$PATH"

echo "--- Build UCX"
curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz
tar -zxf ucx.tar.gz
pushd ucx-*
./configure --with-cuda=/usr/local/cuda --enable-mt --prefix=$$(realpath ../mpi-prefix)
make -j
make -j $(nproc)
make install
popd

Expand All @@ -30,7 +33,7 @@
tar -zxf openmpi.tar.gz
pushd openmpi-$${OPENMPI_VER_FULL}
./configure --with-ucx=$$(realpath ../mpi-prefix) --with-cuda=/usr/local/cuda --prefix=$$(realpath ../mpi-prefix)
make -j
make -j $(nproc)
make install
popd

Expand All @@ -48,12 +51,8 @@
matrix:
setup:
version:
- "1.6"
- "1.7"
- "1.8"
- "1.9"
- "1.10"
- "1.11"
- "1.12"
concurrency: 1
concurrency_group: mpi_cuda
plugins:
Expand Down Expand Up @@ -109,21 +108,25 @@
queue: "juliagpu"
rocm: "*"
env:
# This is broken for OpenMPI 5 and Julia 1.12.
# It is broken for OpenMPI 4 for all versions of Julia. So we use OpenMPI 5 and skip Julia 1.12
OPENMPI_VER: "5.0"
OPENMPI_VER_FULL: "5.0.3"
UCX_VER: "1.17.0"
OPENMPI_VER_FULL: "5.0.9"
# OPENMPI_VER: "4.1"
# OPENMPI_VER_FULL: "4.1.8"
UCX_VER: "1.19.1"
CCACHE_DIR: "/root/ccache"
commands: |
echo "--- Install packages"
apt-get install --yes --no-install-recommends curl ccache
export PATH="/usr/lib/ccache/:$$PATH"
export PATH="/usr/lib/ccache:$$PATH"

echo "--- Build UCX"
curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz
tar -zxf ucx.tar.gz
pushd ucx-*
./configure --with-rocm --enable-mt --prefix=$$(realpath ../mpi-prefix)
make -j
make -j $(nproc)
make install
popd

Expand All @@ -132,7 +135,7 @@
tar -zxf openmpi.tar.gz
pushd openmpi-$${OPENMPI_VER_FULL}
./configure --with-ucx=$$(realpath ../mpi-prefix) --with-rocm --prefix=$$(realpath ../mpi-prefix)
make -j
make -j $(nproc)
make install
popd

Expand All @@ -152,6 +155,14 @@
version:
- "1.10"
- "1.11"
#
# Skip the test with Julia 1.12 because it segfaults while installing packages:
# [amdgpu1:516 :0:516] Caught signal 11 (Segmentation fault: invalid permissions for mapped object at address 0x7ee1a9ec5000)
# ==== backtrace (tid: 516) ====
# 0 /var/lib/buildkite-agent/builds/gpuci-9/julialang/mpi-dot-jl/openmpi/lib/libucs.so.0(ucs_handle_error+0x2e4) [0x7ee18c9bc4d4]
# 1 /var/lib/buildkite-agent/builds/gpuci-9/julialang/mpi-dot-jl/openmpi/lib/libucs.so.0(+0x3b6ca) [0x7ee18c9bc6ca]
#
# - "1.12"
concurrency: 1
concurrency_group: mpi_rocm
plugins:
Expand Down
28 changes: 15 additions & 13 deletions .github/workflows/UnitTests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,19 @@ jobs:
strategy:
matrix:
os:
- macos-13
- macos-15-intel
- ubuntu-latest
- windows-latest
julia_version:
- "1.6"
- "min"
- "1"
- "nightly"
julia_arch: [x64, x86]
exclude:
- os: macos-13
- os: macos-15-intel
julia_arch: x86
include:
- os: macos-14
- os: macos-15
julia_arch: "aarch64"
julia_version: "1"

Expand Down Expand Up @@ -88,10 +88,10 @@ jobs:
strategy:
matrix:
os:
- macos-13
- macos-15-intel
- ubuntu-latest
julia_version:
- "1.6"
- "min"
- "1"
- "nightly"
julia_arch: [x64]
Expand Down Expand Up @@ -139,8 +139,8 @@ jobs:
strategy:
matrix:
os:
- macos-13
- macos-14
- macos-15
- macos-15-intel
mpi:
- mpich
- openmpi
Expand All @@ -150,9 +150,9 @@ jobs:
- "x64"
- "aarch64"
exclude:
- os: macos-13
- os: macos-15-intel
julia_arch: "aarch64"
- os: macos-14
- os: macos-15
julia_arch: "x64"

fail-fast: false
Expand Down Expand Up @@ -344,18 +344,18 @@ jobs:
strategy:
matrix:
os:
- macos-13
- macos-15-intel
- ubuntu-latest
mpi: [mpitrampoline]
julia_version:
- "1.6"
- "min"
- "1"
- "nightly"
julia_arch:
- x64
- x86
exclude:
- os: macos-13
- os: macos-15-intel
julia_arch: x86

fail-fast: false
Expand Down Expand Up @@ -556,6 +556,8 @@ jobs:
MV2_SMP_USE_CMA: 0
# Work around issue with affinity not set. Ref:
# https://github.com/JuliaParallel/MPI.jl/pull/810#issuecomment-1920255386
# MVAPICH 2 and 3 use different environment variables; set both.
MV2_ENABLE_AFFINITY: 0
MVP_ENABLE_AFFINITY: 0

steps:
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ PrecompileTools = "1.0.1"
Requires = "~0.5, 1.0"
Serialization = "1"
Sockets = "1"
julia = "1.6"
julia = "1.10"

[extensions]
AMDGPUExt = "AMDGPU"
Expand Down
12 changes: 12 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,18 @@ testfiles = sort(filter(istest, readdir(testdir)))
""" exception=(e, catch_backtrace())
@test_broken false
end
elseif f == "test_cooperative_wait.jl" && Sys.iswindows()
# This test is broken on Windows. We don't know why.
try
run(cmd())
catch e
@error """
$(f) tests failed. Thsi may be because the Windows MPI implementation is quite old;
it appears unsupported and has not seen bug fixes for a long time.
See the full error message for more details. Some messages may have been written above.
""" exception=(e, catch_backtrace())
@test_broken false
end
else
# MPI_Reduce with MPICH 3.4.2 on macOS when root != 0 and
# when recvbuf == C_NULL segfaults
Expand Down
54 changes: 30 additions & 24 deletions test/test_cooperative_wait.jl
Original file line number Diff line number Diff line change
@@ -1,36 +1,42 @@
# tests for the various kinds of waits
include("common.jl")

MPI.Init(threadlevel=:multiple)
provided = MPI.Init(threadlevel=:multiple)

myrank = MPI.Comm_rank(MPI.COMM_WORLD)
commsize = MPI.Comm_rank(MPI.COMM_WORLD)
if provided >= MPI.ThreadLevel(:multiple)

nsends = 2
send_arr = [ArrayType{Int}([i]) for i = 1:nsends]
recv_arr = [ArrayType{Int}(undef,1) for i = 1:nsends]
synchronize()
myrank = MPI.Comm_rank(MPI.COMM_WORLD)
commsize = MPI.Comm_size(MPI.COMM_WORLD)

send_check = zeros(Int, nsends)
recv_check = zeros(Int, nsends)
nsends = 2
send_arr = [ArrayType{Int}([i]) for i = 1:nsends]
recv_arr = [ArrayType{Int}(undef,1) for i = 1:nsends]
synchronize()

@sync for i = 1:nsends
Threads.@spawn begin
recv_req = MPI.Irecv!(recv_arr[i], MPI.COMM_WORLD; source=myrank, tag=i)
wait(recv_req)
@test MPI.isnull(recv_req)
recv_check[i] += 1
end
Threads.@spawn begin
send_req = MPI.Isend(send_arr[i], MPI.COMM_WORLD; dest=myrank, tag=i)
wait(send_req)
@test MPI.isnull(send_req)
send_check[i] += 1
send_check = zeros(Int, nsends)
recv_check = zeros(Int, nsends)

@sync for i = 1:nsends
Threads.@spawn begin
recv_req = MPI.Irecv!(recv_arr[i], MPI.COMM_WORLD; source=myrank, tag=i)
wait(recv_req)
@test MPI.isnull(recv_req)
recv_check[i] += 1
end
Threads.@spawn begin
send_req = MPI.Isend(send_arr[i], MPI.COMM_WORLD; dest=myrank, tag=i)
wait(send_req)
@test MPI.isnull(send_req)
send_check[i] += 1
end
end
end

@test recv_check == ones(Int, nsends)
@test send_check == ones(Int, nsends)
@test recv_check == ones(Int, nsends)
@test send_check == ones(Int, nsends)
@test all(Array(send_arr[i]) == [i] for i = 1:nsends)
@test all(Array(recv_arr[i]) == [i] for i = 1:nsends)

end

MPI.Barrier(MPI.COMM_WORLD)
MPI.Finalize()
Expand Down
Loading
Loading