1
1
import copy
2
2
import dataclasses
3
3
import datetime
4
+ import decimal
4
5
import subprocess
5
6
import time
7
+ from pathlib import Path
8
+ from unittest .mock import ANY
6
9
7
10
import pytest
8
11
from test_report import sample_compile_result , sample_run_result , sample_system_info
16
19
17
20
@pytest .fixture (scope = "module" )
18
21
def docker_compose ():
22
+ tgt_path = Path .cwd ()
23
+ if tgt_path .name == "unit-tests" :
24
+ tgt_path = tgt_path .parent
25
+
19
26
"""Start a test database and run migrations"""
20
- subprocess .check_call (["docker" , "compose" , "-f" , "docker-compose.test.yml" , "up" , "-d" ])
27
+ subprocess .check_call (
28
+ ["docker" , "compose" , "-f" , "docker-compose.test.yml" , "up" , "-d" ], cwd = tgt_path
29
+ )
21
30
22
31
try :
23
32
# Wait for migrations to finish
@@ -26,6 +35,7 @@ def docker_compose():
26
35
["docker" , "compose" , "-f" , "docker-compose.test.yml" , "ps" , "-q" , "migrate-test" ],
27
36
capture_output = True ,
28
37
text = True ,
38
+ cwd = tgt_path ,
29
39
)
30
40
31
41
if not result .stdout .strip (): # Container no longer exists
@@ -37,6 +47,7 @@ def docker_compose():
37
47
["docker" , "compose" , "-f" , "docker-compose.test.yml" , "logs" , "migrate-test" ],
38
48
capture_output = True ,
39
49
text = True ,
50
+ cwd = tgt_path ,
40
51
)
41
52
42
53
if "error" in logs .stdout .lower ():
@@ -52,7 +63,9 @@ def docker_compose():
52
63
ssl_mode = "disable" ,
53
64
)
54
65
finally :
55
- subprocess .run (["docker" , "compose" , "-f" , "docker-compose.test.yml" , "down" , "-v" ])
66
+ subprocess .run (
67
+ ["docker" , "compose" , "-f" , "docker-compose.test.yml" , "down" , "-v" ], cwd = tgt_path
68
+ )
56
69
57
70
58
71
def _nuke_contents (db ):
@@ -114,7 +127,7 @@ def _create_submission_run(
114
127
):
115
128
"""Creates a submission run with suitable default values"""
116
129
db .create_submission_run (
117
- submission ,
130
+ submission = submission ,
118
131
start = start or datetime .datetime .now (tz = datetime .timezone .utc ),
119
132
end = end
120
133
or (datetime .datetime .now (tz = datetime .timezone .utc ) + datetime .timedelta (seconds = 10 )),
@@ -268,9 +281,9 @@ def test_leaderboard_submission_basic(database, submit_leaderboard):
268
281
with database as db :
269
282
end_time = submit_time + datetime .timedelta (seconds = 10 )
270
283
db .create_submission_run (
271
- sub_id ,
272
- submit_time ,
273
- end_time ,
284
+ submission = sub_id ,
285
+ start = submit_time ,
286
+ end = end_time ,
274
287
mode = "test" ,
275
288
secret = False ,
276
289
runner = "A100" ,
@@ -282,9 +295,9 @@ def test_leaderboard_submission_basic(database, submit_leaderboard):
282
295
# run ends after the contest deadline; this is valid
283
296
end_time_2 = submit_time + datetime .timedelta (days = 1 , hours = 1 )
284
297
db .create_submission_run (
285
- sub_id ,
286
- submit_time ,
287
- end_time_2 ,
298
+ submission = sub_id ,
299
+ start = submit_time ,
300
+ end = end_time_2 ,
288
301
mode = "leaderboard" ,
289
302
secret = True ,
290
303
runner = "H100" ,
@@ -577,6 +590,107 @@ def test_leaderboard_update(database, task_directory):
577
590
}
578
591
579
592
593
+ def test_leaderboard_milestones (database , submit_leaderboard ):
594
+ with database as db :
595
+ lb_id = db .get_leaderboard_id ("submit-leaderboard" )
596
+ milestones = db .get_leaderboard_milestones (lb_id )
597
+ assert milestones == []
598
+
599
+ # at this point, created_at is filled in at the DB level,
600
+ # so we cannot set a fixed value for it in the tests below
601
+ db .create_milestone (lb_id , "Milestone" , "sample code" , "Test milestone" )
602
+ db .create_milestone (
603
+ lb_id , "Milestone2" , "other code" , "Second milestone" , exclude_gpus = ["T4" ]
604
+ )
605
+ milestones = db .get_leaderboard_milestones (lb_id )
606
+ assert milestones == [
607
+ {
608
+ "code" : "sample code" ,
609
+ "created_at" : ANY ,
610
+ "description" : "Test milestone" ,
611
+ "exclude_gpus" : ["" ],
612
+ "id" : 1 ,
613
+ "name" : "Milestone" ,
614
+ },
615
+ {
616
+ "code" : "other code" ,
617
+ "created_at" : ANY ,
618
+ "description" : "Second milestone" ,
619
+ "exclude_gpus" : ["T4" ],
620
+ "id" : 2 ,
621
+ "name" : "Milestone2" ,
622
+ },
623
+ ]
624
+
625
+ db .delete_milestones (lb_id )
626
+ milestones = db .get_leaderboard_milestones (lb_id )
627
+ assert milestones == []
628
+
629
+
630
+ def test_leaderboard_milestone_runs (database , submit_leaderboard ):
631
+ with database as db :
632
+ lb_id = db .get_leaderboard_id ("submit-leaderboard" )
633
+ ms_id = db .create_milestone (lb_id , "Milestone" , "sample code" , "Test milestone" )
634
+
635
+ start = datetime .datetime .now (tz = datetime .timezone .utc )
636
+ end = start + datetime .timedelta (seconds = 10 )
637
+ db .create_submission_run (
638
+ milestone = ms_id ,
639
+ start = start ,
640
+ end = end ,
641
+ mode = "leaderboard" ,
642
+ secret = False ,
643
+ runner = "A100" ,
644
+ score = 5 ,
645
+ compilation = None ,
646
+ result = sample_run_result (),
647
+ system = sample_system_info (),
648
+ )
649
+
650
+ runs = db .get_runs_generic (milestone_id = ms_id )
651
+ assert runs == [
652
+ {
653
+ "compilation" : None ,
654
+ "start_time" : start ,
655
+ "end_time" : end ,
656
+ "meta" : {
657
+ "command" : "./test" ,
658
+ "duration" : 1.5 ,
659
+ "exit_code" : 0 ,
660
+ "stderr" : "" ,
661
+ "stdout" : "All tests passed" ,
662
+ "success" : True ,
663
+ },
664
+ "mode" : "leaderboard" ,
665
+ "passed" : True ,
666
+ "result" : {
667
+ "test-count" : "3" ,
668
+ "test.0.message" : "Addition works correctly" ,
669
+ "test.0.spec" : "Test addition" ,
670
+ "test.0.status" : "pass" ,
671
+ "test.1.spec" : "Test multiplication" ,
672
+ "test.1.status" : "pass" ,
673
+ "test.2.error" : "Division by zero" ,
674
+ "test.2.spec" : "Test division" ,
675
+ "test.2.status" : "fail" ,
676
+ },
677
+ "runner" : "A100" ,
678
+ "score" : decimal .Decimal ("5" ),
679
+ "secret" : False ,
680
+ "system" : {
681
+ "cpu" : "Intel i9-12900K" ,
682
+ "gpu" : "NVIDIA RTX 4090" ,
683
+ "platform" : "Linux-5.15.0" ,
684
+ "torch" : "2.0.1+cu118" ,
685
+ },
686
+ }
687
+ ]
688
+
689
+ db .delete_milestone_runs (lb_id )
690
+ runs = db .get_runs_generic (milestone_id = ms_id )
691
+ assert runs == []
692
+
693
+
580
694
def test_generate_stats (database , submit_leaderboard ):
581
695
with database as db :
582
696
start = datetime .datetime .now (tz = datetime .timezone .utc )
0 commit comments