Skip to content

Commit 9ed1e7d

Browse files
committed
test: fix flaky tests
1 parent 5ef1da4 commit 9ed1e7d

File tree

13 files changed

+370
-209
lines changed

13 files changed

+370
-209
lines changed

.github/workflows/codespell.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,6 @@ jobs:
1717

1818
steps:
1919
- name: Checkout
20-
uses: actions/checkout@v3
20+
uses: actions/checkout@v6
2121
- name: Codespell
2222
uses: codespell-project/actions-codespell@v2

.github/workflows/pyslurm.yml

Lines changed: 10 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,21 @@ on:
88
workflow_dispatch:
99

1010
jobs:
11-
unit-tests:
11+
tests:
1212
runs-on: ubuntu-latest
1313
strategy:
1414
matrix:
1515
python-version: ["3.12"]
1616
fail-fast: false
1717
steps:
1818
- name: Checkout repository
19-
uses: actions/checkout@v4
19+
uses: actions/checkout@v6
2020

2121
- name: Pull Slurm container
2222
run: docker pull giovtorres/slurm-docker:25.11.2
2323

2424
- name: Start Slurm container
25-
run: docker compose -f docker-compose-github.yml up -d
25+
run: docker compose up -d
2626

2727
- name: Wait for Slurm services
2828
run: |
@@ -47,6 +47,13 @@ jobs:
4747
done
4848
docker exec slurmctl sinfo
4949
50+
- name: Tune Slurm config for tests
51+
run: |
52+
docker exec slurmctl bash -c "
53+
sed -i -E 's/CPUs=[0-9]+/CPUs=4/' /etc/slurm/slurm.conf
54+
scontrol reconfigure
55+
"
56+
5057
- name: Build and install PySlurm
5158
run: |
5259
docker exec slurmctl bash -c "
@@ -66,57 +73,6 @@ jobs:
6673
pytest tests/unit -v
6774
"
6875
69-
integration-tests:
70-
runs-on: ubuntu-latest
71-
needs: unit-tests
72-
strategy:
73-
matrix:
74-
python-version: ["3.12"]
75-
fail-fast: false
76-
steps:
77-
- name: Checkout repository
78-
uses: actions/checkout@v4
79-
80-
- name: Pull Slurm container
81-
run: docker pull giovtorres/slurm-docker:25.11.2
82-
83-
- name: Start Slurm container
84-
run: docker compose -f docker-compose-github.yml up -d
85-
86-
- name: Wait for Slurm services
87-
run: |
88-
echo "Waiting for Slurm controller..."
89-
for i in $(seq 1 30); do
90-
if docker exec slurmctl scontrol ping 2>/dev/null | grep -q "UP"; then
91-
echo "Slurm controller is ready"
92-
break
93-
fi
94-
echo "Attempt $i/30..."
95-
sleep 2
96-
done
97-
98-
echo "Waiting for compute nodes..."
99-
for i in $(seq 1 30); do
100-
if docker exec slurmctl sinfo -h -o "%T %D" 2>/dev/null | grep -q "idle"; then
101-
echo "Compute nodes are ready"
102-
break
103-
fi
104-
echo "Attempt $i/30..."
105-
sleep 2
106-
done
107-
docker exec slurmctl sinfo
108-
109-
- name: Build and install PySlurm
110-
run: |
111-
docker exec slurmctl bash -c "
112-
python3 -m venv /opt/pyslurm-venv
113-
source /opt/pyslurm-venv/bin/activate
114-
pip install -r /pyslurm/test_requirements.txt
115-
cd /pyslurm
116-
python setup.py build -j4
117-
python setup.py install
118-
"
119-
12076
- name: Run integration tests
12177
run: |
12278
docker exec slurmctl bash -c "

docker-compose-github.yml

Lines changed: 0 additions & 14 deletions
This file was deleted.
Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,31 @@ VENV_PATH="/opt/pyslurm-venv"
66
BUILD_JOBS="${PYSLURM_BUILD_JOBS:-4}"
77

88
usage() {
9-
echo "Usage: $0 [-c container_name] [-j build_jobs] [-s]"
9+
echo "Usage: $0 [-c container_name] [-j build_jobs] [-s] [-C] [-u | -i]"
1010
echo " -c Container name (default: slurmctl)"
1111
echo " -j Parallel build jobs (default: 4)"
1212
echo " -s Skip build step (reuse existing install)"
13+
echo " -C Clean build (remove build artifacts before building)"
14+
echo " -u Run unit tests only"
15+
echo " -i Run integration tests only"
16+
echo ""
17+
echo "With no flags, runs both unit and integration tests."
1318
exit 1
1419
}
1520

1621
SKIP_BUILD=false
22+
CLEAN_BUILD=false
23+
RUN_UNIT=true
24+
RUN_INTEGRATION=true
1725

18-
while getopts ":c:j:sh" o; do
26+
while getopts ":c:j:sCuih" o; do
1927
case "${o}" in
2028
c) CONTAINER_NAME="${OPTARG}" ;;
2129
j) BUILD_JOBS="${OPTARG}" ;;
2230
s) SKIP_BUILD=true ;;
31+
C) CLEAN_BUILD=true ;;
32+
u) RUN_UNIT=true; RUN_INTEGRATION=false ;;
33+
i) RUN_UNIT=false; RUN_INTEGRATION=true ;;
2334
h) usage ;;
2435
*) usage ;;
2536
esac
@@ -63,21 +74,51 @@ for i in $(seq 1 30); do
6374
sleep 2
6475
done
6576

77+
# Ensure nodes have enough CPUs to avoid resource contention between tests
78+
echo "Tuning Slurm config for tests..."
79+
docker exec "$CONTAINER_NAME" bash -c "
80+
sed -i -E 's/CPUs=[0-9]+/CPUs=4/' /etc/slurm/slurm.conf
81+
scontrol reconfigure
82+
"
83+
6684
if [ "$SKIP_BUILD" = false ]; then
85+
if [ "$CLEAN_BUILD" = true ]; then
86+
echo "Cleaning build artifacts..."
87+
docker exec "$CONTAINER_NAME" bash -c "
88+
cd /pyslurm
89+
rm -rf build/ *.egg-info
90+
find pyslurm -name '*.so' -delete
91+
find pyslurm -name '*.c' -not -name '__init__.c' -delete 2>/dev/null || true
92+
"
93+
fi
94+
6795
echo "Building and installing PySlurm..."
96+
build_start=$(date +%s)
6897
docker exec "$CONTAINER_NAME" bash -c "
6998
python3 -m venv $VENV_PATH 2>/dev/null || true
7099
source $VENV_PATH/bin/activate
71-
pip install -q -r /pyslurm/test_requirements.txt
100+
pip install -q --disable-pip-version-check -r /pyslurm/test_requirements.txt
72101
cd /pyslurm
73-
python setup.py build -j$BUILD_JOBS
74-
python setup.py install
102+
pip install -v --disable-pip-version-check -e . --no-build-isolation --config-settings='--build-option=build_ext -j$BUILD_JOBS'
75103
"
104+
build_end=$(date +%s)
105+
echo "Build completed in $((build_end - build_start))s."
76106
fi
77107

78-
echo "Running integration tests..."
79-
docker exec "$CONTAINER_NAME" bash -c "
80-
source $VENV_PATH/bin/activate
81-
cd /pyslurm
82-
pytest tests/integration -v \"\$@\"
83-
" -- "$@"
108+
if [ "$RUN_UNIT" = true ]; then
109+
echo "Running unit tests..."
110+
docker exec "$CONTAINER_NAME" bash -c "
111+
source $VENV_PATH/bin/activate
112+
cd /pyslurm
113+
pytest tests/unit -v
114+
"
115+
fi
116+
117+
if [ "$RUN_INTEGRATION" = true ]; then
118+
echo "Running integration tests..."
119+
docker exec "$CONTAINER_NAME" bash -c "
120+
source $VENV_PATH/bin/activate
121+
cd /pyslurm
122+
pytest tests/integration -v
123+
"
124+
fi

tests/integration/conftest.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,25 +20,38 @@
2020
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
2121

2222
import pytest
23-
from pyslurm import (
24-
Job,
25-
JobSubmitDescription,
26-
)
27-
from util import create_simple_job_desc
23+
from pyslurm import Job
24+
from util import create_simple_job_desc, wait_for_job_done
25+
26+
27+
TEARDOWN_TIMEOUT = 15
2828

2929

3030
@pytest.fixture
3131
def submit_job():
32+
"""Factory fixture that submits jobs and cleans them up after the test.
3233
34+
Teardown cancels all submitted jobs and waits for them to reach a
35+
terminal state, preventing resource contention between tests.
36+
"""
3337
jobs = []
38+
3439
def _job(script=None, **kwargs):
3540
job_desc = create_simple_job_desc(script, **kwargs)
3641
job = Job(job_desc.submit())
37-
3842
jobs.append(job)
3943
return job
4044

4145
yield _job
4246

4347
for j in jobs:
44-
j.cancel()
48+
try:
49+
j.cancel()
50+
except Exception:
51+
pass
52+
53+
for j in jobs:
54+
try:
55+
wait_for_job_done(j.id, timeout=TEARDOWN_TIMEOUT)
56+
except (TimeoutError, Exception):
57+
pass

0 commit comments

Comments
 (0)