Move guided agent in its own file.

MarcCote · MarcCote · commit e5abb91025c9 · 2025-06-11T15:07:38.000-04:00
diff --git a/.gitignore b/.gitignore
@@ -180,8 +180,6 @@ logs/
 
 /data
 
-.vscode/
-
 vscode/out
 vscode/node_modules
 vscode/package-lock.json
diff --git a/debug_gym/agents/__init__.py b/debug_gym/agents/__init__.py
@@ -1,2 +1,3 @@
 from debug_gym.agents.debug_agent import Debug_5_Agent, DebugAgent
+from debug_gym.agents.guided_agent import GuidedRewriteAgent
 from debug_gym.agents.rewrite_agent import RewriteAgent
diff --git a/debug_gym/agents/debug_agent.py b/debug_gym/agents/debug_agent.py
@@ -1,5 +1,4 @@
 from debug_gym.agents.base_agent import BaseAgent, register_agent
-from debug_gym.llms.base import LLM
 
 
 @register_agent
@@ -63,79 +62,3 @@ def run(self, task_name=None, debug=False):
                 break
 
         return info.done
-
-
-@register_agent
-class DebugHumanInTheLoop(DebugAgent):
-    name: str = "debug_human"
-
-    def run(self, task_name=None, debug=False):
-        # instantiate the human in the loop
-        self.human = LLM.instantiate(
-            llm_name="human",
-            llm_config_file_path=self.config.get("llm_config_file_path"),
-            logger=self.logger,
-        )
-
-        self.history.reset()
-        info = self.env.reset(options={"task_name": task_name})
-        # initial state does not have prompt and response
-        self.history.step(info, None)
-
-        if info.done is True:
-            # msg = "Environment started with entrypoint passing without errors."
-            return True
-
-        highscore = info.score
-
-        for step in self.logger.tqdm(range(self.config["max_steps"])):
-            highscore = max(highscore, info.score)
-            self.logger.info(
-                f"Score: {info.score}/{info.max_score} ({info.score/info.max_score:.1%}) [Best: {highscore}]"
-            )
-
-            prompt = self.build_prompt(info)
-
-            human_response = self.human(prompt, info.tools)
-
-            if debug:
-                breakpoint()
-
-            # make a copy of the env for the llm
-            self.cloned_env = self.env.clone()
-            # remove the pdb tool from the cloned env
-            if self.cloned_env.has_tool("pdb"):
-                self.cloned_env.remove_tool("pdb")
-            llm_info = self.cloned_env.reset(options={"task_name": task_name})
-            # replay the history up to the current step
-            for step in self.history.get_all():
-                if step.done:
-                    break
-                llm_info = self.cloned_env.step(step.action)
-
-            # step the environment with the human response
-            info = self.env.step(human_response.response)
-            # log the human response
-            self.history.step(info, human_response)
-
-            if info.done or info.rewrite_counter >= self.config["max_rewrite_steps"]:
-                self.logger.info(
-                    f"Score (human): {info.score}/{info.max_score} ({info.score/info.max_score:.1%})"
-                )
-                break
-
-            # call the llm with the cloned environment
-            prompt = self.build_prompt(llm_info)
-            llm_response = self.llm(prompt, llm_info.tools)
-            llm_info = self.cloned_env.step(llm_response.response)
-
-            if (
-                llm_info.done
-                or llm_info.rewrite_counter >= self.config["max_rewrite_steps"]
-            ):
-                self.logger.info(
-                    f"Score (llm): {llm_info.score}/{llm_info.max_score} ({llm_info.score/llm_info.max_score:.1%})"
-                )
-                break
-
-        return info.done
diff --git a/debug_gym/agents/guided_agent.py b/debug_gym/agents/guided_agent.py
@@ -0,0 +1,81 @@
+import logging
+
+from debug_gym.agents.base_agent import register_agent
+from debug_gym.agents.rewrite_agent import RewriteAgent
+from debug_gym.llms.base import LLM
+from debug_gym.logger import DebugGymLogger
+
+
+@register_agent
+class GuidedRewriteAgent(RewriteAgent):
+    name: str = "guided_agent"
+
+    def try_rewrite(self, task_name):
+        # make a copy of the env for the llm
+        cloned_env = self.env.clone()
+
+        # Only keep the rewrite tool in the cloned env
+        for tool in cloned_env.tools:
+            if tool.name != "rewrite":
+                cloned_env.remove_tool(tool.name)
+
+        # Reset the cloned environment and replay the history.
+        info = cloned_env.reset(options={"task_name": task_name})
+        # replay the history up to the current step
+        for step in self.history.get_all():
+            assert not step.done
+            info = cloned_env.step(step.action)
+
+        prompt = self.build_prompt(info)
+        response = self.llm(prompt, info.tools)
+        info = cloned_env.step(response.response)
+
+        return info.done
+
+    def run(self, task_name=None, debug=False):
+        self.llm.logger = DebugGymLogger(name="LLM", level=logging.ERROR)
+        self.human = LLM.instantiate(llm_name="human", logger=self.logger)
+
+        self.history.reset()
+        info = self.env.reset(options={"task_name": task_name})
+        # initial state does not have prompt and response
+        self.history.step(info, None)
+
+        if info.done is True:
+            # msg = "Environment started with entrypoint passing without errors."
+            return True
+
+        highscore = info.score
+
+        for step in self.logger.tqdm(range(self.config["max_steps"])):
+            highscore = max(highscore, info.score)
+            self.logger.info(
+                f"Score: {info.score}/{info.max_score} ({info.score/info.max_score:.1%}) [Best: {highscore}]"
+            )
+
+            llm_done = self.try_rewrite(task_name)
+            if llm_done:
+                self.logger.info(
+                    f"*** The rewrite-only agent with {self.llm.model_name} managed to solve the task with the current context. ***"
+                )
+                break
+
+            # If the LLM did not manage to solve the task, we continue with the guided approach.
+            prompt = self.build_prompt(info)
+            human_response = self.human(prompt, info.tools)
+
+            if debug:
+                breakpoint()
+
+            # step the environment with the human response
+            info = self.env.step(human_response.response)
+            # log the human response
+            self.history.step(info, human_response)
+
+            if info.done:
+                self.logger.info(
+                    "You managed to provide the patch that solves the task before the LLM. Congrats!"
+                )
+                break
+
+        return info.done
diff --git a/debug_gym/agents/utils.py b/debug_gym/agents/utils.py
@@ -159,7 +159,6 @@ def load_config():
         nargs="+",
         action="extend",
         metavar="my.setting=value",
-        action="extend",
         default=[],
         help="override params of the config file,"
         " e.g. -p 'rewrite_only.random_seed=123'",
diff --git a/pyproject.toml b/pyproject.toml
@@ -30,4 +30,4 @@ dev = [
     "pytest-xdist",
     "pytest-timeout",
     "pytest-env",
-]
+]
diff --git a/scripts/config_mini_nightmare.yaml b/scripts/config_mini_nightmare.yaml
@@ -43,5 +43,5 @@ debug_5_agent:
     n_rewrites_before_pdb: 5
     tools: ["pdb", "view", "rewrite", "eval"]
 
-debug_human:
+guided_agent:
     tools: ["pdb", "view", "rewrite", "eval"]

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`from debug_gym.agents.debug_agent import Debug_5_Agent, DebugAgent`
	`2`	`+from debug_gym.agents.guided_agent import GuidedRewriteAgent`
`2`	`3`	`from debug_gym.agents.rewrite_agent import RewriteAgent`