diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3411db6c..16c65f2c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,8 +1,12 @@ name: CI -on: [pull_request, push] +on: + pull_request: + push: + branches: + - master -# Cancel a job if there's a new on on the same branch started. +# Cancel a job if there's a new one on the same branch started. # Based on https://stackoverflow.com/questions/58895283/stop-already-running-workflow-job-in-github-actions/67223051#67223051 concurrency: group: ${{ github.ref }} @@ -14,8 +18,7 @@ env: # Faster crates.io index checkout. CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse RUST_LOG: debug - # Build the kernel only for the single architecture . This should reduce - # the overall compile-time significantly. + # Build the kernel only for the single architecture. This should reduce the overall compile-time significantly. EC_GPU_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75 BELLMAN_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75 NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75 @@ -27,7 +30,9 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install required packages - run: sudo apt install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev + run: | + sudo apt-get update + sudo apt-get install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev - name: Install cargo clippy run: rustup component add clippy - name: Run cargo clippy @@ -44,13 +49,29 @@ jobs: run: cargo fmt --all -- --check test: - runs-on: ubuntu-24.04 + runs-on: ['self-hosted', 'linux', 'x64', '2xlarge+gpu'] name: Test steps: - uses: actions/checkout@v4 + # TODO: Move the driver installation to the AMI. + # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/install-nvidia-driver.html + # https://www.nvidia.com/en-us/drivers/ + - name: Install CUDA drivers + run: | + curl -L -o nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb https://us.download.nvidia.com/tesla/570.148.08/nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb + sudo dpkg -i nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb + sudo cp /var/nvidia-driver-local-repo-ubuntu2404-570.148.08/nvidia-driver-local-*-keyring.gpg /usr/share/keyrings/ + sudo apt-get update + sudo apt-get install --no-install-recommends --yes cuda-drivers + rm nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb - name: Install required packages - run: sudo apt install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev - # In case no GPUs are available, it's using the CPU fallback. + run: | + sudo apt-get update + sudo apt-get install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev + # TODO: Remove this and other rust installation directives from jobs running + - uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17 + with: + toolchain: 1.83 - name: Test run: cargo test --verbose