diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 0c8cf1cff..73a763fa7 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -7,21 +7,24 @@ queue: "juliagpu" cuda: "*" env: + # This is broken for OpenMPI 5 and Julia 1.12, so we stick with OpenMPI 4 OPENMPI_VER: "4.1" - OPENMPI_VER_FULL: "4.1.4" - UCX_VER: "1.12.1" + OPENMPI_VER_FULL: "4.1.8" + # OPENMPI_VER: "5.0" + # OPENMPI_VER_FULL: "5.0.9" + UCX_VER: "1.19.1" CCACHE_DIR: "/root/ccache" commands: | echo "--- Install packages" apt-get install --yes --no-install-recommends curl ccache - export PATH="/usr/lib/ccache/:$$PATH" + export PATH="/usr/lib/ccache:$$PATH" echo "--- Build UCX" curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz tar -zxf ucx.tar.gz pushd ucx-* ./configure --with-cuda=/usr/local/cuda --enable-mt --prefix=$$(realpath ../mpi-prefix) - make -j + make -j $(nproc) make install popd @@ -30,7 +33,7 @@ tar -zxf openmpi.tar.gz pushd openmpi-$${OPENMPI_VER_FULL} ./configure --with-ucx=$$(realpath ../mpi-prefix) --with-cuda=/usr/local/cuda --prefix=$$(realpath ../mpi-prefix) - make -j + make -j $(nproc) make install popd @@ -48,12 +51,8 @@ matrix: setup: version: - - "1.6" - - "1.7" - - "1.8" - - "1.9" - "1.10" - - "1.11" + - "1.12" concurrency: 1 concurrency_group: mpi_cuda plugins: @@ -109,21 +108,25 @@ queue: "juliagpu" rocm: "*" env: + # This is broken for OpenMPI 5 and Julia 1.12. + # It is broken for OpenMPI 4 for all versions of Julia. So we use OpenMPI 5 and skip Julia 1.12 OPENMPI_VER: "5.0" - OPENMPI_VER_FULL: "5.0.3" - UCX_VER: "1.17.0" + OPENMPI_VER_FULL: "5.0.9" + # OPENMPI_VER: "4.1" + # OPENMPI_VER_FULL: "4.1.8" + UCX_VER: "1.19.1" CCACHE_DIR: "/root/ccache" commands: | echo "--- Install packages" apt-get install --yes --no-install-recommends curl ccache - export PATH="/usr/lib/ccache/:$$PATH" + export PATH="/usr/lib/ccache:$$PATH" echo "--- Build UCX" curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz tar -zxf ucx.tar.gz pushd ucx-* ./configure --with-rocm --enable-mt --prefix=$$(realpath ../mpi-prefix) - make -j + make -j $(nproc) make install popd @@ -132,7 +135,7 @@ tar -zxf openmpi.tar.gz pushd openmpi-$${OPENMPI_VER_FULL} ./configure --with-ucx=$$(realpath ../mpi-prefix) --with-rocm --prefix=$$(realpath ../mpi-prefix) - make -j + make -j $(nproc) make install popd @@ -152,6 +155,14 @@ version: - "1.10" - "1.11" + # + # Skip the test with Julia 1.12 because it segfaults while installing packages: + # [amdgpu1:516 :0:516] Caught signal 11 (Segmentation fault: invalid permissions for mapped object at address 0x7ee1a9ec5000) + # ==== backtrace (tid: 516) ==== + # 0 /var/lib/buildkite-agent/builds/gpuci-9/julialang/mpi-dot-jl/openmpi/lib/libucs.so.0(ucs_handle_error+0x2e4) [0x7ee18c9bc4d4] + # 1 /var/lib/buildkite-agent/builds/gpuci-9/julialang/mpi-dot-jl/openmpi/lib/libucs.so.0(+0x3b6ca) [0x7ee18c9bc6ca] + # + # - "1.12" concurrency: 1 concurrency_group: mpi_rocm plugins: diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index fd793e138..d52b56c50 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -33,19 +33,19 @@ jobs: strategy: matrix: os: - - macos-13 + - macos-15-intel - ubuntu-latest - windows-latest julia_version: - - "1.6" + - "min" - "1" - "nightly" julia_arch: [x64, x86] exclude: - - os: macos-13 + - os: macos-15-intel julia_arch: x86 include: - - os: macos-14 + - os: macos-15 julia_arch: "aarch64" julia_version: "1" @@ -88,10 +88,10 @@ jobs: strategy: matrix: os: - - macos-13 + - macos-15-intel - ubuntu-latest julia_version: - - "1.6" + - "min" - "1" - "nightly" julia_arch: [x64] @@ -139,8 +139,8 @@ jobs: strategy: matrix: os: - - macos-13 - - macos-14 + - macos-15 + - macos-15-intel mpi: - mpich - openmpi @@ -150,9 +150,9 @@ jobs: - "x64" - "aarch64" exclude: - - os: macos-13 + - os: macos-15-intel julia_arch: "aarch64" - - os: macos-14 + - os: macos-15 julia_arch: "x64" fail-fast: false @@ -344,18 +344,18 @@ jobs: strategy: matrix: os: - - macos-13 + - macos-15-intel - ubuntu-latest mpi: [mpitrampoline] julia_version: - - "1.6" + - "min" - "1" - "nightly" julia_arch: - x64 - x86 exclude: - - os: macos-13 + - os: macos-15-intel julia_arch: x86 fail-fast: false @@ -556,6 +556,8 @@ jobs: MV2_SMP_USE_CMA: 0 # Work around issue with affinity not set. Ref: # https://github.com/JuliaParallel/MPI.jl/pull/810#issuecomment-1920255386 + # MVAPICH 2 and 3 use different environment variables; set both. + MV2_ENABLE_AFFINITY: 0 MVP_ENABLE_AFFINITY: 0 steps: diff --git a/Project.toml b/Project.toml index 7e3d66252..ccbddc9db 100644 --- a/Project.toml +++ b/Project.toml @@ -33,7 +33,7 @@ PrecompileTools = "1.0.1" Requires = "~0.5, 1.0" Serialization = "1" Sockets = "1" -julia = "1.6" +julia = "1.10" [extensions] AMDGPUExt = "AMDGPU" diff --git a/test/runtests.jl b/test/runtests.jl index 74530018d..30d27b674 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -112,6 +112,18 @@ testfiles = sort(filter(istest, readdir(testdir))) """ exception=(e, catch_backtrace()) @test_broken false end + elseif f == "test_cooperative_wait.jl" && Sys.iswindows() + # This test is broken on Windows. We don't know why. + try + run(cmd()) + catch e + @error """ + $(f) tests failed. Thsi may be because the Windows MPI implementation is quite old; + it appears unsupported and has not seen bug fixes for a long time. + See the full error message for more details. Some messages may have been written above. + """ exception=(e, catch_backtrace()) + @test_broken false + end else # MPI_Reduce with MPICH 3.4.2 on macOS when root != 0 and # when recvbuf == C_NULL segfaults diff --git a/test/test_cooperative_wait.jl b/test/test_cooperative_wait.jl index 181b1f544..7bbb1b7f0 100644 --- a/test/test_cooperative_wait.jl +++ b/test/test_cooperative_wait.jl @@ -1,36 +1,42 @@ # tests for the various kinds of waits include("common.jl") -MPI.Init(threadlevel=:multiple) +provided = MPI.Init(threadlevel=:multiple) -myrank = MPI.Comm_rank(MPI.COMM_WORLD) -commsize = MPI.Comm_rank(MPI.COMM_WORLD) +if provided >= MPI.ThreadLevel(:multiple) -nsends = 2 -send_arr = [ArrayType{Int}([i]) for i = 1:nsends] -recv_arr = [ArrayType{Int}(undef,1) for i = 1:nsends] -synchronize() + myrank = MPI.Comm_rank(MPI.COMM_WORLD) + commsize = MPI.Comm_size(MPI.COMM_WORLD) -send_check = zeros(Int, nsends) -recv_check = zeros(Int, nsends) + nsends = 2 + send_arr = [ArrayType{Int}([i]) for i = 1:nsends] + recv_arr = [ArrayType{Int}(undef,1) for i = 1:nsends] + synchronize() -@sync for i = 1:nsends - Threads.@spawn begin - recv_req = MPI.Irecv!(recv_arr[i], MPI.COMM_WORLD; source=myrank, tag=i) - wait(recv_req) - @test MPI.isnull(recv_req) - recv_check[i] += 1 - end - Threads.@spawn begin - send_req = MPI.Isend(send_arr[i], MPI.COMM_WORLD; dest=myrank, tag=i) - wait(send_req) - @test MPI.isnull(send_req) - send_check[i] += 1 + send_check = zeros(Int, nsends) + recv_check = zeros(Int, nsends) + + @sync for i = 1:nsends + Threads.@spawn begin + recv_req = MPI.Irecv!(recv_arr[i], MPI.COMM_WORLD; source=myrank, tag=i) + wait(recv_req) + @test MPI.isnull(recv_req) + recv_check[i] += 1 + end + Threads.@spawn begin + send_req = MPI.Isend(send_arr[i], MPI.COMM_WORLD; dest=myrank, tag=i) + wait(send_req) + @test MPI.isnull(send_req) + send_check[i] += 1 + end end -end -@test recv_check == ones(Int, nsends) -@test send_check == ones(Int, nsends) + @test recv_check == ones(Int, nsends) + @test send_check == ones(Int, nsends) + @test all(Array(send_arr[i]) == [i] for i = 1:nsends) + @test all(Array(recv_arr[i]) == [i] for i = 1:nsends) + +end MPI.Barrier(MPI.COMM_WORLD) MPI.Finalize() diff --git a/test/test_io_shared.jl b/test/test_io_shared.jl index bf2c7f0b4..242108bd0 100644 --- a/test/test_io_shared.jl +++ b/test/test_io_shared.jl @@ -1,5 +1,29 @@ include("common.jl") +# Syncing parallel MPI I/O is a bit involved: +function sync(comm, fh) + # First ensure that all local changes are flushed ... + MPI.File.sync(fh) + # ... then wait for all other process to finish doing that ... + MPI.Barrier(comm) + # ... then make sure we see all changes that the other processes made. + MPI.File.sync(fh) +end + +# Find MPI vendor +library_version = MPI.Get_library_version() +# Peel off MPItrampoline if present +if startswith(library_version, "MPIwrapper ") + library_version = join(split(library_version, "\n")[2:end], "\n") +end +if startswith(library_version, "MPICH ") + vendor = :MPICH +elseif startswith(library_version, "Open MPI ") + vendor = :OpenMPI +else + vendor = nothing +end + MPI.Init() comm = MPI.COMM_WORLD @@ -7,8 +31,6 @@ rank = MPI.Comm_rank(comm) sz = MPI.Comm_size(comm) filename = MPI.bcast(tempname(), 0, comm) -MPI.Barrier(comm) - # Collective write fh = MPI.File.open(comm, filename, read=true, write=true, create=true) @test MPI.File.get_position_shared(fh) == 0 @@ -16,21 +38,15 @@ fh = MPI.File.open(comm, filename, read=true, write=true, create=true) if !MPI.File.get_atomicity(fh) MPI.File.set_atomicity(fh, true) end - @test MPI.File.get_atomicity(fh) - -MPI.Barrier(comm) -MPI.File.sync(fh) +sync(comm, fh) header = "my header" if rank == 0 MPI.File.write_shared(fh, header) end - -# TODO: is there a better way to synchronise shared pointers? -MPI.Barrier(comm) -MPI.File.sync(fh) +sync(comm, fh) offset = MPI.File.get_position_shared(fh) @test offset == sizeof(header) @@ -38,52 +54,40 @@ byte_offset = MPI.File.get_byte_offset(fh, offset) @test byte_offset == offset MPI.File.set_view!(fh, byte_offset, MPI.Datatype(Int64), MPI.Datatype(Int64)) -MPI.Barrier(comm) -MPI.File.sync(fh) -@test MPI.File.get_position_shared(fh) == 0 - -MPI.Barrier(comm) -MPI.File.sync(fh) +sync(comm, fh) +# https://github.com/JuliaParallel/MPI.jl/issues/555 +# https://github.com/JuliaParallel/MPI.jl/issues/879 +@test MPI.File.get_position_shared(fh) == 0 skip = Sys.isapple() || (vendor == :MPICH && Sys.isunix()) || Sys.iswindows() MPI.File.write_ordered(fh, fill(Int64(rank), rank+1)) -@test MPI.File.get_position_shared(fh) == sum(1:sz) +sync(comm, fh) +# https://github.com/JuliaParallel/MPI.jl/issues/879 +@test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.isapple() MPI.File.seek_shared(fh, 0) @test MPI.File.get_position_shared(fh) == 0 - -MPI.Barrier(comm) -MPI.File.sync(fh) +sync(comm, fh) buf = zeros(Int64, rank+1) MPI.File.read_ordered!(fh, buf) @test buf == fill(Int64(rank), rank+1) +sync(comm, fh) -MPI.Barrier(comm) -MPI.File.sync(fh) -if Sys.iswindows() - # TODO: this has to be fixed: https://github.com/JuliaParallel/MPI.jl/issues/555 - @test_skip MPI.File.get_position_shared(fh) == sum(1:sz) -else - @test MPI.File.get_position_shared(fh) == sum(1:sz) -end +# https://github.com/JuliaParallel/MPI.jl/issues/555 +@test MPI.File.get_position_shared(fh) == sum(1:sz) skip = Sys.iswindows() MPI.File.set_view!(fh, 0, MPI.Datatype(UInt8), MPI.Datatype(UInt8)) -MPI.Barrier(comm) -MPI.File.sync(fh) +sync(comm, fh) MPI.File.seek_shared(fh, 0) @test MPI.File.get_position_shared(fh) == 0 - -MPI.Barrier(comm) -MPI.File.sync(fh) +sync(comm, fh) if rank == sz-1 buf = Array{UInt8}(undef, sizeof(header)) MPI.File.read_shared!(fh, buf) @test String(buf) == header end - -MPI.Barrier(comm) -MPI.File.sync(fh) +sync(comm, fh) @test MPI.File.get_position_shared(fh) == sizeof(header)