Skip to content

Commit e1636b0

Browse files
committed
update unit tests to cover milestone data
1 parent 8770472 commit e1636b0

File tree

2 files changed

+142
-14
lines changed

2 files changed

+142
-14
lines changed

unit-tests/test_leaderboard_db.py

Lines changed: 123 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
import copy
22
import dataclasses
33
import datetime
4+
import decimal
45
import subprocess
56
import time
7+
from pathlib import Path
8+
from unittest.mock import ANY
69

710
import pytest
811
from test_report import sample_compile_result, sample_run_result, sample_system_info
@@ -16,8 +19,14 @@
1619

1720
@pytest.fixture(scope="module")
1821
def docker_compose():
22+
tgt_path = Path.cwd()
23+
if tgt_path.name == "unit-tests":
24+
tgt_path = tgt_path.parent
25+
1926
"""Start a test database and run migrations"""
20-
subprocess.check_call(["docker", "compose", "-f", "docker-compose.test.yml", "up", "-d"])
27+
subprocess.check_call(
28+
["docker", "compose", "-f", "docker-compose.test.yml", "up", "-d"], cwd=tgt_path
29+
)
2130

2231
try:
2332
# Wait for migrations to finish
@@ -26,6 +35,7 @@ def docker_compose():
2635
["docker", "compose", "-f", "docker-compose.test.yml", "ps", "-q", "migrate-test"],
2736
capture_output=True,
2837
text=True,
38+
cwd=tgt_path,
2939
)
3040

3141
if not result.stdout.strip(): # Container no longer exists
@@ -37,6 +47,7 @@ def docker_compose():
3747
["docker", "compose", "-f", "docker-compose.test.yml", "logs", "migrate-test"],
3848
capture_output=True,
3949
text=True,
50+
cwd=tgt_path,
4051
)
4152

4253
if "error" in logs.stdout.lower():
@@ -52,7 +63,9 @@ def docker_compose():
5263
ssl_mode="disable",
5364
)
5465
finally:
55-
subprocess.run(["docker", "compose", "-f", "docker-compose.test.yml", "down", "-v"])
66+
subprocess.run(
67+
["docker", "compose", "-f", "docker-compose.test.yml", "down", "-v"], cwd=tgt_path
68+
)
5669

5770

5871
def _nuke_contents(db):
@@ -114,7 +127,7 @@ def _create_submission_run(
114127
):
115128
"""Creates a submission run with suitable default values"""
116129
db.create_submission_run(
117-
submission,
130+
submission=submission,
118131
start=start or datetime.datetime.now(tz=datetime.timezone.utc),
119132
end=end
120133
or (datetime.datetime.now(tz=datetime.timezone.utc) + datetime.timedelta(seconds=10)),
@@ -268,9 +281,9 @@ def test_leaderboard_submission_basic(database, submit_leaderboard):
268281
with database as db:
269282
end_time = submit_time + datetime.timedelta(seconds=10)
270283
db.create_submission_run(
271-
sub_id,
272-
submit_time,
273-
end_time,
284+
submission=sub_id,
285+
start=submit_time,
286+
end=end_time,
274287
mode="test",
275288
secret=False,
276289
runner="A100",
@@ -282,9 +295,9 @@ def test_leaderboard_submission_basic(database, submit_leaderboard):
282295
# run ends after the contest deadline; this is valid
283296
end_time_2 = submit_time + datetime.timedelta(days=1, hours=1)
284297
db.create_submission_run(
285-
sub_id,
286-
submit_time,
287-
end_time_2,
298+
submission=sub_id,
299+
start=submit_time,
300+
end=end_time_2,
288301
mode="leaderboard",
289302
secret=True,
290303
runner="H100",
@@ -577,6 +590,107 @@ def test_leaderboard_update(database, task_directory):
577590
}
578591

579592

593+
def test_leaderboard_milestones(database, submit_leaderboard):
594+
with database as db:
595+
lb_id = db.get_leaderboard_id("submit-leaderboard")
596+
milestones = db.get_leaderboard_milestones(lb_id)
597+
assert milestones == []
598+
599+
# at this point, created_at is filled in at the DB level,
600+
# so we cannot set a fixed value for it in the tests below
601+
db.create_milestone(lb_id, "Milestone", "sample code", "Test milestone")
602+
db.create_milestone(
603+
lb_id, "Milestone2", "other code", "Second milestone", exclude_gpus=["T4"]
604+
)
605+
milestones = db.get_leaderboard_milestones(lb_id)
606+
assert milestones == [
607+
{
608+
"code": "sample code",
609+
"created_at": ANY,
610+
"description": "Test milestone",
611+
"exclude_gpus": [""],
612+
"id": 1,
613+
"name": "Milestone",
614+
},
615+
{
616+
"code": "other code",
617+
"created_at": ANY,
618+
"description": "Second milestone",
619+
"exclude_gpus": ["T4"],
620+
"id": 2,
621+
"name": "Milestone2",
622+
},
623+
]
624+
625+
db.delete_milestones(lb_id)
626+
milestones = db.get_leaderboard_milestones(lb_id)
627+
assert milestones == []
628+
629+
630+
def test_leaderboard_milestone_runs(database, submit_leaderboard):
631+
with database as db:
632+
lb_id = db.get_leaderboard_id("submit-leaderboard")
633+
ms_id = db.create_milestone(lb_id, "Milestone", "sample code", "Test milestone")
634+
635+
start = datetime.datetime.now(tz=datetime.timezone.utc)
636+
end = start + datetime.timedelta(seconds=10)
637+
db.create_submission_run(
638+
milestone=ms_id,
639+
start=start,
640+
end=end,
641+
mode="leaderboard",
642+
secret=False,
643+
runner="A100",
644+
score=5,
645+
compilation=None,
646+
result=sample_run_result(),
647+
system=sample_system_info(),
648+
)
649+
650+
runs = db.get_runs_generic(milestone_id=ms_id)
651+
assert runs == [
652+
{
653+
"compilation": None,
654+
"start_time": start,
655+
"end_time": end,
656+
"meta": {
657+
"command": "./test",
658+
"duration": 1.5,
659+
"exit_code": 0,
660+
"stderr": "",
661+
"stdout": "All tests passed",
662+
"success": True,
663+
},
664+
"mode": "leaderboard",
665+
"passed": True,
666+
"result": {
667+
"test-count": "3",
668+
"test.0.message": "Addition works correctly",
669+
"test.0.spec": "Test addition",
670+
"test.0.status": "pass",
671+
"test.1.spec": "Test multiplication",
672+
"test.1.status": "pass",
673+
"test.2.error": "Division by zero",
674+
"test.2.spec": "Test division",
675+
"test.2.status": "fail",
676+
},
677+
"runner": "A100",
678+
"score": decimal.Decimal("5"),
679+
"secret": False,
680+
"system": {
681+
"cpu": "Intel i9-12900K",
682+
"gpu": "NVIDIA RTX 4090",
683+
"platform": "Linux-5.15.0",
684+
"torch": "2.0.1+cu118",
685+
},
686+
}
687+
]
688+
689+
db.delete_milestone_runs(lb_id)
690+
runs = db.get_runs_generic(milestone_id=ms_id)
691+
assert runs == []
692+
693+
580694
def test_generate_stats(database, submit_leaderboard):
581695
with database as db:
582696
start = datetime.datetime.now(tz=datetime.timezone.utc)

unit-tests/test_task.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
Language,
1111
LeaderboardDefinition,
1212
LeaderboardTask,
13+
MilestoneData,
1314
PythonTaskData,
1415
RankCriterion,
1516
build_task_config,
@@ -57,7 +58,6 @@ def test_from_dict_python_task():
5758

5859

5960
def test_from_dict_cuda_task():
60-
"""Test creating LeaderboardTask from dict with CUDA config"""
6161
"""Test creating LeaderboardTask from dict with CUDA config"""
6262
data = {
6363
"lang": "cu",
@@ -93,7 +93,7 @@ def test_type_mismatch():
9393
)
9494

9595

96-
def test_to_dict(leaderboard_task):
96+
def test_to_dict(leaderboard_task: LeaderboardTask):
9797
"""Test converting LeaderboardTask to dict"""
9898
result = leaderboard_task.to_dict()
9999

@@ -114,15 +114,15 @@ def test_to_dict(leaderboard_task):
114114
]
115115

116116

117-
def test_serialization_roundtrip(leaderboard_task):
117+
def test_serialization_roundtrip(leaderboard_task: LeaderboardTask):
118118
"""Test to_str and from_str work together"""
119119
json_str = leaderboard_task.to_str()
120120
reconstructed = LeaderboardTask.from_str(json_str)
121121

122122
assert reconstructed == leaderboard_task
123123

124124

125-
def test_build_task_config_python(leaderboard_task):
125+
def test_build_task_config_python(leaderboard_task: LeaderboardTask):
126126
"""Test build_task_config with Python task and submission content."""
127127
submission_content = "print('Hello World')"
128128
arch = "sm_80"
@@ -235,6 +235,11 @@ def test_build_task_config_cuda():
235235
templates:
236236
Python: "template.py"
237237
CUDA: "template.cu"
238+
milestones:
239+
- name: "Milestone"
240+
source: "milestone.py"
241+
description: "This milestone is a test milestone"
242+
exclude_gpus: ["A100"]
238243
"""
239244

240245

@@ -245,13 +250,14 @@ def task_directory(tmp_path):
245250
Path.write_text(tmp_path / "kernel.py", "def kernel(): pass")
246251
Path.write_text(tmp_path / "template.py", "# Python template")
247252
Path.write_text(tmp_path / "template.cu", "// CUDA template")
253+
Path.write_text(tmp_path / "milestone.py", "def milestone(): pass")
248254

249255
# Create task.yml
250256
Path.write_text(tmp_path / "task.yml", TASK_YAML)
251257
return tmp_path
252258

253259

254-
def test_make_task_definition(task_directory):
260+
def test_make_task_definition(task_directory: Path):
255261
"""Test make_task_definition with a complete YAML structure"""
256262

257263
# Test the function
@@ -261,6 +267,14 @@ def test_make_task_definition(task_directory):
261267
assert isinstance(result, LeaderboardDefinition)
262268
assert result.description == "Test task description"
263269
assert result.templates == {"Python": "# Python template", "CUDA": "// CUDA template"}
270+
assert result.milestones == [
271+
MilestoneData(
272+
name="Milestone",
273+
code="def milestone(): pass",
274+
description="This milestone is a test milestone",
275+
exclude_gpus=["A100"],
276+
)
277+
]
264278

265279
# Verify the task
266280
task = result.task

0 commit comments

Comments
 (0)