Skip to content

Commit 0adba71

Browse files
MarcCotematheper
andauthored
Remove dependency on local filesystem. (#228)
* Pull changes from #107 and #212 * Add a workspace class. Refactoring + tests * Refactor SWE-Bench and make sure all 500 tasks can be solved. * Pass revision id to parent class * Tests should check for default SWE-bench/SWE-bench_Verified * Make sure we apply any changes needed for setting up the environment. * Rename RemoteWorkspace -> Workspace * Fix typos. * Use better delimiter for here-document + add explantion to workspace.write_file * Show with hidden files when listing file with workspace * Fix interact_with_pdb test * Raises if git diff fails --------- Co-authored-by: Matheus Pereira <[email protected]>
1 parent b1e796d commit 0adba71

35 files changed

+1459
-1249
lines changed

debug_gym/agents/base_agent.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,10 +348,12 @@ def save_patch(self, task_name="custom"):
348348
)
349349

350350
def log(self, task_name="custom"):
351+
# Simple tools list.
352+
tools = [f"{tool.name}({tool.arguments})" for tool in self.env.tools]
351353
jsonl_output = {
352354
"problem": task_name,
353355
"config": self.config,
354-
"tools": self.llm.define_tools(self.env.tools),
356+
"tools": self.llm.define_tools(self.env.tools) if self.llm else tools,
355357
"uuid": self._uuid,
356358
"success": self.env.done,
357359
"log": [],

debug_gym/gym/envs/aider.py

Lines changed: 99 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,81 @@
11
import os
22
import subprocess
3+
import tempfile
4+
from pathlib import Path
35

46
import debug_gym.gym.utils as utils
57
from debug_gym.constants import DEBUG_GYM_CACHE_DIR
68
from debug_gym.gym.entities import EvalOutput
79
from debug_gym.gym.envs.env import RepoEnv
10+
from debug_gym.gym.terminal import DockerTerminal, Terminal
11+
12+
DOCKER_AIDER_IMAGE_NAME = "debug-gym:aider"
13+
14+
15+
def build_docker_image(logger):
16+
"""
17+
Build a Docker image for the Mini Nightmare environment.
18+
"""
19+
# Check if Docker image is built.
20+
import docker
21+
22+
docker_client = docker.from_env(timeout=600)
23+
try:
24+
docker_client.images.get(DOCKER_AIDER_IMAGE_NAME)
25+
return
26+
except docker.errors.ImageNotFound:
27+
pass
28+
29+
logger.info(f"Docker image {DOCKER_AIDER_IMAGE_NAME} not found. Building it...")
30+
31+
# Starts from the official Python 3.12 slim image
32+
base_image = "python:3.12-slim"
33+
# Then install git and the required Python packages
34+
setup_commands = [
35+
"apt update",
36+
"apt install -y git tree",
37+
"pip install pytest",
38+
]
39+
# Create a temporary Dockerfile
40+
with tempfile.TemporaryDirectory() as build_dir:
41+
dockerfile_path = Path(build_dir) / "Dockerfile"
42+
with open(dockerfile_path, "w") as dockerfile:
43+
dockerfile.write(f"FROM {base_image}\n")
44+
for command in setup_commands:
45+
dockerfile.write(f"RUN {command}\n")
46+
47+
# Build the Docker image using docker client
48+
image, build_logs = docker_client.images.build(
49+
path=str(build_dir),
50+
dockerfile="Dockerfile",
51+
tag=DOCKER_AIDER_IMAGE_NAME,
52+
rm=True,
53+
)
54+
55+
logger.info(f"Docker image {DOCKER_AIDER_IMAGE_NAME} built successfully.")
856

957

1058
class AiderBenchmarkEnv(RepoEnv):
1159
REPO_URL = "https://github.com/exercism/python"
1260
REPO_PATH = DEBUG_GYM_CACHE_DIR / "exercism"
1361

62+
def __init__(
63+
self,
64+
entrypoint: str = "python -m pytest --tb=no -s .",
65+
terminal: Terminal | None = None,
66+
**kwargs,
67+
):
68+
69+
terminal = terminal or DockerTerminal(
70+
base_image=DOCKER_AIDER_IMAGE_NAME,
71+
logger=kwargs.get("logger"),
72+
)
73+
74+
super().__init__(entrypoint=entrypoint, terminal=terminal, **kwargs)
75+
1476
@property
1577
def instructions(self) -> str:
16-
return self.current_sample["instructions"]
17-
18-
def __init__(self, entrypoint: str = "python -m pytest -s .", **kwargs):
19-
super().__init__(entrypoint=entrypoint, **kwargs)
78+
return self.current_task["instructions"]
2079

2180
def calculate_max_score(self, eval_output: EvalOutput) -> int:
2281
return utils.extract_max_score_from_pytest_output(eval_output.output)
@@ -30,15 +89,43 @@ def eval(self, **kwargs) -> EvalOutput:
3089
self.last_eval = EvalOutput(success, output)
3190
return self.last_eval
3291

33-
def reset(self, *, options: dict = None):
34-
options = options or {}
35-
self.current_sample = self.dataset[options["task_name"]]
36-
directory = self.current_sample["base_directory"]
37-
self.setup_workspace(directory, entrypoint=self.entrypoint)
38-
infos = super().reset(options=options)
39-
return infos
92+
def setup_task(self, task_name: str, options: dict = None):
93+
if task_name not in self.dataset:
94+
raise ValueError(f"Task {task_name} not found in the dataset.")
95+
self.current_task = self.dataset[task_name]
96+
97+
def setup_workspace(self):
98+
self.workspace.reset()
99+
100+
self.logger.info("Copying files..")
101+
self.workspace.copy_content(
102+
src=self.current_task["codebase"], target=self.workspace.working_dir
103+
)
104+
self.workspace.setup_file_filters() # Use codebase's .debugignore and .debugreadonly.
105+
106+
self.set_entrypoints("python -m pytest --tb=no -s .")
107+
108+
def setup_terminal(self):
109+
self.logger.info(f"Configuring {self.terminal}...")
110+
111+
self.terminal.run("git init")
112+
self.terminal.run("git config user.name 'debug-gym'")
113+
self.terminal.run("git config user.email '<>'")
114+
115+
self.terminal.run(
116+
"git add *.py *.txt"
117+
) # Aider tasks only have Python and text files.
118+
self.terminal.run("git commit -am 'Init'")
119+
120+
self.terminal.run(
121+
"git add .debugignore .debugreadonly"
122+
) # Aider tasks come with those.
123+
self.terminal.run("git commit -am 'Add debug-gym ignore and read-only files'")
40124

41125
def load_dataset(self, problems: str | list[str] | None = None):
126+
if isinstance(self.terminal, DockerTerminal):
127+
build_docker_image(self.logger)
128+
42129
if not os.path.exists(self.REPO_PATH):
43130
subprocess.run(["git", "clone", self.REPO_URL, self.REPO_PATH], check=True)
44131

@@ -65,9 +152,6 @@ def load_dataset(self, problems: str | list[str] | None = None):
65152
".?*", # Ignore hidden files and directories but not current dir "."
66153
"__pycache__/",
67154
"*.pyc",
68-
# "*.md",
69-
# "log/",
70-
# "data/",
71155
],
72156
)
73157
# Add .debugreadonly so tests are readonly.
@@ -76,7 +160,7 @@ def load_dataset(self, problems: str | list[str] | None = None):
76160
)
77161

78162
dataset[task_name] = {
79-
"base_directory": directory,
163+
"codebase": directory,
80164
"instructions": instructions,
81165
"filename": task_name + ".py",
82166
}

0 commit comments

Comments
 (0)