Skip to content

Commit 01477fd

Browse files
sordoniaxingdi-eric-yuanAlessandro Sordonimatheper
authored
Bash tool (#209)
* add back the message and LLM response to log * accuracy logging * return None on utf8 + uniform colors + print tool calls * grep tool * black * isort * bash tool * test bash * add test, improve grep tool (#208) * isort * black * black * remove mention of apt * bash grep * remove two tests looking at ignore * For now let's force to use DockerTerminal for bash and grep tools. * add a FORCE_DOCKER_TERMINAL env var * add config * skip binary files --------- Co-authored-by: Xingdi (Eric) Yuan <[email protected]> Co-authored-by: Alessandro Sordoni <[email protected]> Co-authored-by: Matheus Pereira <[email protected]>
1 parent c3cbbbb commit 01477fd

File tree

11 files changed

+977
-1
lines changed

11 files changed

+977
-1
lines changed

.github/workflows/tests.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,19 @@ jobs:
4040
tests/gym/envs/test_swe_*.py
4141
- name: Test - PR - Fast
4242
if: github.event_name == 'pull_request' && steps.changed-files-specific.outputs.any_changed != 'true'
43+
env:
44+
FORCE_DOCKER_TERMINAL: false
4345
run: |
4446
DEBUG_GYM_DEBUG=1 pytest -vv -n 16 -k "not test_swe_bench and not test_swe_smith" --cov=debug_gym --cov-report=term-missing --cov-fail-under=80 --timeout=600
4547
- name: Test - PR - Slow
4648
if: github.event_name == 'pull_request' && steps.changed-files-specific.outputs.any_changed == 'true'
49+
env:
50+
FORCE_DOCKER_TERMINAL: false
4751
run: |
4852
DEBUG_GYM_DEBUG=1 pytest -vv -n 16 --cov=debug_gym --cov-report=term-missing --cov-fail-under=85 --timeout=600
4953
- name: Test - main
5054
if: github.event_name != 'pull_request'
55+
env:
56+
FORCE_DOCKER_TERMINAL: false
5157
run: |
5258
DEBUG_GYM_DEBUG=1 pytest -vv -n 16 --cov=debug_gym --cov-report=term-missing --cov-fail-under=85 --timeout=600

debug_gym/gym/tools/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
from debug_gym.gym.tools.bash import BashTool
12
from debug_gym.gym.tools.eval import EvalTool
3+
from debug_gym.gym.tools.grep import GrepTool
24
from debug_gym.gym.tools.listdir import ListdirTool
35
from debug_gym.gym.tools.pdb import PDBTool
46
from debug_gym.gym.tools.rewrite import RewriteTool

debug_gym/gym/tools/bash.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from debug_gym.gym.entities import Observation
2+
from debug_gym.gym.tools.tool import EnvironmentTool
3+
from debug_gym.gym.tools.toolbox import Toolbox
4+
5+
6+
@Toolbox.register()
7+
class BashTool(EnvironmentTool):
8+
name: str = "bash"
9+
examples = [
10+
"""bash(command="ls -la") to list all files and directories with detailed information.""",
11+
"""bash(command="grep -r 'function_name' .") to search for 'function_name' in all files recursively.""",
12+
"""bash(command="find . -name '*.py' | head -10") to find Python files in the current directory.""",
13+
"""bash(command="cat file.txt | head -20") to show the first 20 lines of a file.""",
14+
"""bash(command="sed -n 10,25p path/to/file") to show lines 10 to 25 of a file at relative path.""",
15+
"""bash(command="pip list") to show installed Python packages.""",
16+
]
17+
description = (
18+
"Run commands in a bash shell. "
19+
"You have access to common linux and python packages via pip. "
20+
"State is persistent across command calls within the same session. "
21+
"\nExamples (for demonstration purposes only, you need to adjust the tool calling format according to your specific syntax):\n"
22+
+ "\n".join(examples)
23+
)
24+
arguments = {
25+
"command": {
26+
"type": ["string"],
27+
"description": "The bash command to execute. The command will be run in the current working directory of the environment.",
28+
},
29+
}
30+
31+
def use(self, environment, command: str) -> Observation:
32+
"""Execute a bash command in the environment's terminal and return the result."""
33+
try:
34+
# Assert that the terminal is a Docker terminal (only in production)
35+
import os
36+
37+
from debug_gym.gym.terminal import DockerTerminal
38+
39+
# Skip Docker terminal check during testing or when explicitly disabled
40+
require_docker = (
41+
os.getenv("FORCE_DOCKER_TERMINAL", "true").lower() == "true"
42+
)
43+
if require_docker and not isinstance(environment.terminal, DockerTerminal):
44+
return Observation(
45+
self.name,
46+
"Error: bash tool requires a Docker terminal. Current terminal type is not supported.",
47+
)
48+
49+
# Use the environment's terminal to run the command
50+
# Set a reasonable timeout (30 seconds) to prevent hanging
51+
success, output = environment.terminal.run(command, timeout=30)
52+
53+
if success:
54+
result = (
55+
output
56+
if output.strip()
57+
else "Command executed successfully (no output)"
58+
)
59+
else:
60+
result = f"Command failed with output:\n{output}"
61+
62+
except Exception as e:
63+
result = f"Error executing command: {str(e)}"
64+
65+
return Observation(self.name, result)

debug_gym/gym/tools/grep.py

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
import shlex
2+
3+
from debug_gym.gym.entities import Observation
4+
from debug_gym.gym.tools.tool import EnvironmentTool
5+
from debug_gym.gym.tools.toolbox import Toolbox
6+
7+
8+
@Toolbox.register()
9+
class GrepTool(EnvironmentTool):
10+
name: str = "grep"
11+
12+
examples = [
13+
"""grep(pattern="function", path=None) to search for the word "function" in all files in the repository.""",
14+
"""grep(pattern="class.*Test", path="*.py") to search for lines matching the regex pattern "class.*Test" in all files under the 'tests/' directory.""",
15+
"""grep(pattern="import numpy", path="src/main.py") to search for "import numpy" in the specific file 'src/main.py'.""",
16+
"""grep(pattern="TODO") to search for "TODO".""",
17+
"""grep(pattern="bug", max_results=10) to search for "bug" and limit results to 10 matches.""",
18+
]
19+
description = (
20+
"Search for a pattern in files within the repository. Can search in specific files, directories, or the entire repository. "
21+
"Supports both literal string matching and regular expressions."
22+
+ "\nExamples (for demonstration purposes only, you need to adjust the tool calling format according to your specific syntax):\n"
23+
+ "\n".join(examples)
24+
)
25+
arguments = {
26+
"pattern": {
27+
"type": ["string"],
28+
"description": "The pattern to search for. Can be a literal string or a regular expression (if regex=True).",
29+
},
30+
"path": {
31+
"type": ["string", "null"],
32+
"description": "Optional glob pattern to search in. If None, searches the entire repository. Path should be relative to the repository root.",
33+
},
34+
"max_results": {
35+
"type": ["number", "null"],
36+
"description": "Maximum number of matching lines to return. If None, returns 100 matches.",
37+
},
38+
}
39+
40+
def use(
41+
self,
42+
environment,
43+
pattern: str,
44+
path: str = None,
45+
regex: bool = True,
46+
case_sensitive: bool = True,
47+
line_numbers: bool = True,
48+
max_results: int = 100,
49+
) -> Observation:
50+
"""Use grep functionality via bash tool as a special case."""
51+
if not pattern:
52+
return Observation(self.name, "Pattern cannot be empty.")
53+
54+
# Build grep command arguments
55+
grep_args = []
56+
57+
# Add options
58+
grep_args.append("-n") # line numbers
59+
grep_args.append("-r") # recursive
60+
grep_args.append("-E") # extended regex
61+
grep_args.append("-H") # print filename with output
62+
grep_args.append("-I") # skip binary files
63+
64+
if not case_sensitive:
65+
grep_args.append("-i") # ignore case
66+
67+
if not regex:
68+
grep_args.append("-F") # fixed strings (literal)
69+
70+
# Add pattern (safely quoted)
71+
grep_args.append(shlex.quote(pattern))
72+
73+
# Add path or default to current directory
74+
if path:
75+
grep_args.append(shlex.quote(path))
76+
else:
77+
grep_args.append(".")
78+
79+
# Build the complete command
80+
command = "grep " + " ".join(grep_args)
81+
82+
# Add exclusions for common non-text directories and limit results
83+
command += (
84+
" | grep -v '/.git/' | grep -v '__pycache__' | grep -v '/node_modules/'"
85+
)
86+
87+
if max_results:
88+
command += f" | head -{max_results}"
89+
90+
try:
91+
# Assert that the terminal is a Docker terminal (only in production)
92+
import os
93+
94+
from debug_gym.gym.terminal import DockerTerminal
95+
96+
# Skip Docker terminal check during testing or when explicitly disabled
97+
require_docker = (
98+
os.getenv("FORCE_DOCKER_TERMINAL", "true").lower() == "true"
99+
)
100+
if require_docker and not isinstance(environment.terminal, DockerTerminal):
101+
return Observation(
102+
self.name,
103+
"Error: grep tool requires a Docker terminal. Current terminal type is not supported.",
104+
)
105+
106+
# Use the environment's terminal to run the grep command
107+
success, output = environment.terminal.run(command, timeout=30)
108+
109+
if success:
110+
if output.strip():
111+
# Process the output to match expected format
112+
lines = output.strip().split("\n")
113+
114+
if not lines or (len(lines) == 1 and not lines[0]):
115+
search_scope = f"in {path}" if path else "in repository"
116+
pattern_desc = f"pattern '{pattern}'"
117+
return Observation(
118+
self.name,
119+
f"No matches found for {pattern_desc} {search_scope}.",
120+
)
121+
122+
if lines[0].startswith("grep: "):
123+
# Handle grep error messages
124+
return Observation(self.name, f"Grep error: {lines[0][6:]}")
125+
126+
# Format output
127+
output_lines = []
128+
if len(lines) >= max_results:
129+
output_lines.append(
130+
f"Showing first {len(lines)} matches (search limit reached):"
131+
)
132+
else:
133+
output_lines.append(f"Found {len(lines)} matches:")
134+
135+
output_lines.append("")
136+
137+
current_file = None
138+
for line in lines:
139+
if ":" in line:
140+
# Parse grep output: filename:line_number:content
141+
parts = line.split(":", 2)
142+
if len(parts) >= 3:
143+
file_path = parts[0]
144+
line_num = parts[1]
145+
line_content = parts[2]
146+
147+
if file_path != current_file:
148+
if current_file is not None:
149+
output_lines.append(
150+
""
151+
) # Empty line between files
152+
output_lines.append(f"=== {file_path} ===")
153+
current_file = file_path
154+
155+
if len(line_content) >= 300:
156+
line_content = line_content[:300] + "..."
157+
158+
if line_numbers:
159+
output_lines.append(
160+
f"{line_num:>4}: {line_content}"
161+
)
162+
else:
163+
output_lines.append(line_content)
164+
else:
165+
# Fallback for unusual grep output
166+
output_lines.append(line)
167+
else:
168+
output_lines.append(line)
169+
170+
return Observation(self.name, "\n".join(output_lines))
171+
else:
172+
search_scope = f"in {path}" if path else "in repository"
173+
pattern_desc = f"pattern '{pattern}'"
174+
return Observation(
175+
self.name,
176+
f"No matches found for {pattern_desc} {search_scope}.",
177+
)
178+
else:
179+
return Observation(self.name, f"Grep command failed: {output}")
180+
181+
except Exception as e:
182+
return Observation(self.name, f"Error executing grep: {str(e)}")

scripts/config.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,7 @@ debug_agent:
4646
debug_5_agent:
4747
n_rewrites_before_pdb: 5
4848
tools: ["pdb", "view", "rewrite", "eval"]
49+
50+
grep_agent:
51+
agent_type: "rewrite_agent"
52+
tools: ["grep", "view", "rewrite", "eval"]

scripts/config_aider.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,8 @@ debug_agent:
4444

4545
debug_5_agent:
4646
n_rewrites_before_pdb: 5
47-
tools: ["pdb", "view", "rewrite", "eval"]
47+
tools: ["pdb", "view", "rewrite", "eval"]
48+
49+
grep_agent:
50+
agent_type: "rewrite_agent"
51+
tools: ["grep", "view", "rewrite", "eval"]

scripts/config_mini_nightmare.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,7 @@ debug_agent:
4545
debug_5_agent:
4646
n_rewrites_before_pdb: 5
4747
tools: ["pdb", "view", "rewrite", "eval"]
48+
49+
grep_agent:
50+
agent_type: "rewrite_agent"
51+
tools: ["grep", "view", "rewrite", "eval"]

scripts/config_swebench.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,7 @@ debug_5_agent:
4444
solution_agent:
4545
llm_name: "human" # No need for an LLM.
4646
tools: ["eval", "pdb"]
47+
48+
grep_agent:
49+
agent_type: "rewrite_agent"
50+
tools: ["grep", "view", "rewrite", "listdir", "eval"]

scripts/config_swesmith.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,7 @@ debug_5_agent:
4545
solution_agent:
4646
llm_name: "human" # No need for an LLM.
4747
tools: ["eval", "pdb"]
48+
49+
grep_agent:
50+
agent_type: "rewrite_agent"
51+
tools: ["grep", "view", "rewrite", "listdir", "eval"]

0 commit comments

Comments
 (0)