1
1
import os
2
2
import subprocess
3
+ import tempfile
4
+ from pathlib import Path
3
5
4
6
import debug_gym .gym .utils as utils
5
7
from debug_gym .constants import DEBUG_GYM_CACHE_DIR
6
8
from debug_gym .gym .entities import EvalOutput
7
9
from debug_gym .gym .envs .env import RepoEnv
10
+ from debug_gym .gym .terminal import DockerTerminal , Terminal
11
+
12
+ DOCKER_AIDER_IMAGE_NAME = "debug-gym:aider"
13
+
14
+
15
+ def build_docker_image (logger ):
16
+ """
17
+ Build a Docker image for the Mini Nightmare environment.
18
+ """
19
+ # Check if Docker image is built.
20
+ import docker
21
+
22
+ docker_client = docker .from_env (timeout = 600 )
23
+ try :
24
+ docker_client .images .get (DOCKER_AIDER_IMAGE_NAME )
25
+ return
26
+ except docker .errors .ImageNotFound :
27
+ pass
28
+
29
+ logger .info (f"Docker image { DOCKER_AIDER_IMAGE_NAME } not found. Building it..." )
30
+
31
+ # Starts from the official Python 3.12 slim image
32
+ base_image = "python:3.12-slim"
33
+ # Then install git and the required Python packages
34
+ setup_commands = [
35
+ "apt update" ,
36
+ "apt install -y git tree" ,
37
+ "pip install pytest" ,
38
+ ]
39
+ # Create a temporary Dockerfile
40
+ with tempfile .TemporaryDirectory () as build_dir :
41
+ dockerfile_path = Path (build_dir ) / "Dockerfile"
42
+ with open (dockerfile_path , "w" ) as dockerfile :
43
+ dockerfile .write (f"FROM { base_image } \n " )
44
+ for command in setup_commands :
45
+ dockerfile .write (f"RUN { command } \n " )
46
+
47
+ # Build the Docker image using docker client
48
+ image , build_logs = docker_client .images .build (
49
+ path = str (build_dir ),
50
+ dockerfile = "Dockerfile" ,
51
+ tag = DOCKER_AIDER_IMAGE_NAME ,
52
+ rm = True ,
53
+ )
54
+
55
+ logger .info (f"Docker image { DOCKER_AIDER_IMAGE_NAME } built successfully." )
8
56
9
57
10
58
class AiderBenchmarkEnv (RepoEnv ):
11
59
REPO_URL = "https://github.com/exercism/python"
12
60
REPO_PATH = DEBUG_GYM_CACHE_DIR / "exercism"
13
61
62
+ def __init__ (
63
+ self ,
64
+ entrypoint : str = "python -m pytest --tb=no -s ." ,
65
+ terminal : Terminal | None = None ,
66
+ ** kwargs ,
67
+ ):
68
+
69
+ terminal = terminal or DockerTerminal (
70
+ base_image = DOCKER_AIDER_IMAGE_NAME ,
71
+ logger = kwargs .get ("logger" ),
72
+ )
73
+
74
+ super ().__init__ (entrypoint = entrypoint , terminal = terminal , ** kwargs )
75
+
14
76
@property
15
77
def instructions (self ) -> str :
16
- return self .current_sample ["instructions" ]
17
-
18
- def __init__ (self , entrypoint : str = "python -m pytest -s ." , ** kwargs ):
19
- super ().__init__ (entrypoint = entrypoint , ** kwargs )
78
+ return self .current_task ["instructions" ]
20
79
21
80
def calculate_max_score (self , eval_output : EvalOutput ) -> int :
22
81
return utils .extract_max_score_from_pytest_output (eval_output .output )
@@ -30,15 +89,43 @@ def eval(self, **kwargs) -> EvalOutput:
30
89
self .last_eval = EvalOutput (success , output )
31
90
return self .last_eval
32
91
33
- def reset (self , * , options : dict = None ):
34
- options = options or {}
35
- self .current_sample = self .dataset [options ["task_name" ]]
36
- directory = self .current_sample ["base_directory" ]
37
- self .setup_workspace (directory , entrypoint = self .entrypoint )
38
- infos = super ().reset (options = options )
39
- return infos
92
+ def setup_task (self , task_name : str , options : dict = None ):
93
+ if task_name not in self .dataset :
94
+ raise ValueError (f"Task { task_name } not found in the dataset." )
95
+ self .current_task = self .dataset [task_name ]
96
+
97
+ def setup_workspace (self ):
98
+ self .workspace .reset ()
99
+
100
+ self .logger .info ("Copying files.." )
101
+ self .workspace .copy_content (
102
+ src = self .current_task ["codebase" ], target = self .workspace .working_dir
103
+ )
104
+ self .workspace .setup_file_filters () # Use codebase's .debugignore and .debugreadonly.
105
+
106
+ self .set_entrypoints ("python -m pytest --tb=no -s ." )
107
+
108
+ def setup_terminal (self ):
109
+ self .logger .info (f"Configuring { self .terminal } ..." )
110
+
111
+ self .terminal .run ("git init" )
112
+ self .terminal .run ("git config user.name 'debug-gym'" )
113
+ self .terminal .run ("git config user.email '<>'" )
114
+
115
+ self .terminal .run (
116
+ "git add *.py *.txt"
117
+ ) # Aider tasks only have Python and text files.
118
+ self .terminal .run ("git commit -am 'Init'" )
119
+
120
+ self .terminal .run (
121
+ "git add .debugignore .debugreadonly"
122
+ ) # Aider tasks come with those.
123
+ self .terminal .run ("git commit -am 'Add debug-gym ignore and read-only files'" )
40
124
41
125
def load_dataset (self , problems : str | list [str ] | None = None ):
126
+ if isinstance (self .terminal , DockerTerminal ):
127
+ build_docker_image (self .logger )
128
+
42
129
if not os .path .exists (self .REPO_PATH ):
43
130
subprocess .run (["git" , "clone" , self .REPO_URL , self .REPO_PATH ], check = True )
44
131
@@ -65,9 +152,6 @@ def load_dataset(self, problems: str | list[str] | None = None):
65
152
".?*" , # Ignore hidden files and directories but not current dir "."
66
153
"__pycache__/" ,
67
154
"*.pyc" ,
68
- # "*.md",
69
- # "log/",
70
- # "data/",
71
155
],
72
156
)
73
157
# Add .debugreadonly so tests are readonly.
@@ -76,7 +160,7 @@ def load_dataset(self, problems: str | list[str] | None = None):
76
160
)
77
161
78
162
dataset [task_name ] = {
79
- "base_directory " : directory ,
163
+ "codebase " : directory ,
80
164
"instructions" : instructions ,
81
165
"filename" : task_name + ".py" ,
82
166
}
0 commit comments