|
| 1 | +import shlex |
| 2 | + |
| 3 | +from debug_gym.gym.entities import Observation |
| 4 | +from debug_gym.gym.tools.tool import EnvironmentTool |
| 5 | +from debug_gym.gym.tools.toolbox import Toolbox |
| 6 | + |
| 7 | + |
| 8 | +@Toolbox.register() |
| 9 | +class GrepTool(EnvironmentTool): |
| 10 | + name: str = "grep" |
| 11 | + |
| 12 | + examples = [ |
| 13 | + """grep(pattern="function", path=None) to search for the word "function" in all files in the repository.""", |
| 14 | + """grep(pattern="class.*Test", path="*.py") to search for lines matching the regex pattern "class.*Test" in all files under the 'tests/' directory.""", |
| 15 | + """grep(pattern="import numpy", path="src/main.py") to search for "import numpy" in the specific file 'src/main.py'.""", |
| 16 | + """grep(pattern="TODO") to search for "TODO".""", |
| 17 | + """grep(pattern="bug", max_results=10) to search for "bug" and limit results to 10 matches.""", |
| 18 | + ] |
| 19 | + description = ( |
| 20 | + "Search for a pattern in files within the repository. Can search in specific files, directories, or the entire repository. " |
| 21 | + "Supports both literal string matching and regular expressions." |
| 22 | + + "\nExamples (for demonstration purposes only, you need to adjust the tool calling format according to your specific syntax):\n" |
| 23 | + + "\n".join(examples) |
| 24 | + ) |
| 25 | + arguments = { |
| 26 | + "pattern": { |
| 27 | + "type": ["string"], |
| 28 | + "description": "The pattern to search for. Can be a literal string or a regular expression (if regex=True).", |
| 29 | + }, |
| 30 | + "path": { |
| 31 | + "type": ["string", "null"], |
| 32 | + "description": "Optional glob pattern to search in. If None, searches the entire repository. Path should be relative to the repository root.", |
| 33 | + }, |
| 34 | + "max_results": { |
| 35 | + "type": ["number", "null"], |
| 36 | + "description": "Maximum number of matching lines to return. If None, returns 100 matches.", |
| 37 | + }, |
| 38 | + } |
| 39 | + |
| 40 | + def use( |
| 41 | + self, |
| 42 | + environment, |
| 43 | + pattern: str, |
| 44 | + path: str = None, |
| 45 | + regex: bool = True, |
| 46 | + case_sensitive: bool = True, |
| 47 | + line_numbers: bool = True, |
| 48 | + max_results: int = 100, |
| 49 | + ) -> Observation: |
| 50 | + """Use grep functionality via bash tool as a special case.""" |
| 51 | + if not pattern: |
| 52 | + return Observation(self.name, "Pattern cannot be empty.") |
| 53 | + |
| 54 | + # Build grep command arguments |
| 55 | + grep_args = [] |
| 56 | + |
| 57 | + # Add options |
| 58 | + grep_args.append("-n") # line numbers |
| 59 | + grep_args.append("-r") # recursive |
| 60 | + grep_args.append("-E") # extended regex |
| 61 | + grep_args.append("-H") # print filename with output |
| 62 | + grep_args.append("-I") # skip binary files |
| 63 | + |
| 64 | + if not case_sensitive: |
| 65 | + grep_args.append("-i") # ignore case |
| 66 | + |
| 67 | + if not regex: |
| 68 | + grep_args.append("-F") # fixed strings (literal) |
| 69 | + |
| 70 | + # Add pattern (safely quoted) |
| 71 | + grep_args.append(shlex.quote(pattern)) |
| 72 | + |
| 73 | + # Add path or default to current directory |
| 74 | + if path: |
| 75 | + grep_args.append(shlex.quote(path)) |
| 76 | + else: |
| 77 | + grep_args.append(".") |
| 78 | + |
| 79 | + # Build the complete command |
| 80 | + command = "grep " + " ".join(grep_args) |
| 81 | + |
| 82 | + # Add exclusions for common non-text directories and limit results |
| 83 | + command += ( |
| 84 | + " | grep -v '/.git/' | grep -v '__pycache__' | grep -v '/node_modules/'" |
| 85 | + ) |
| 86 | + |
| 87 | + if max_results: |
| 88 | + command += f" | head -{max_results}" |
| 89 | + |
| 90 | + try: |
| 91 | + # Assert that the terminal is a Docker terminal (only in production) |
| 92 | + import os |
| 93 | + |
| 94 | + from debug_gym.gym.terminal import DockerTerminal |
| 95 | + |
| 96 | + # Skip Docker terminal check during testing or when explicitly disabled |
| 97 | + require_docker = ( |
| 98 | + os.getenv("FORCE_DOCKER_TERMINAL", "true").lower() == "true" |
| 99 | + ) |
| 100 | + if require_docker and not isinstance(environment.terminal, DockerTerminal): |
| 101 | + return Observation( |
| 102 | + self.name, |
| 103 | + "Error: grep tool requires a Docker terminal. Current terminal type is not supported.", |
| 104 | + ) |
| 105 | + |
| 106 | + # Use the environment's terminal to run the grep command |
| 107 | + success, output = environment.terminal.run(command, timeout=30) |
| 108 | + |
| 109 | + if success: |
| 110 | + if output.strip(): |
| 111 | + # Process the output to match expected format |
| 112 | + lines = output.strip().split("\n") |
| 113 | + |
| 114 | + if not lines or (len(lines) == 1 and not lines[0]): |
| 115 | + search_scope = f"in {path}" if path else "in repository" |
| 116 | + pattern_desc = f"pattern '{pattern}'" |
| 117 | + return Observation( |
| 118 | + self.name, |
| 119 | + f"No matches found for {pattern_desc} {search_scope}.", |
| 120 | + ) |
| 121 | + |
| 122 | + if lines[0].startswith("grep: "): |
| 123 | + # Handle grep error messages |
| 124 | + return Observation(self.name, f"Grep error: {lines[0][6:]}") |
| 125 | + |
| 126 | + # Format output |
| 127 | + output_lines = [] |
| 128 | + if len(lines) >= max_results: |
| 129 | + output_lines.append( |
| 130 | + f"Showing first {len(lines)} matches (search limit reached):" |
| 131 | + ) |
| 132 | + else: |
| 133 | + output_lines.append(f"Found {len(lines)} matches:") |
| 134 | + |
| 135 | + output_lines.append("") |
| 136 | + |
| 137 | + current_file = None |
| 138 | + for line in lines: |
| 139 | + if ":" in line: |
| 140 | + # Parse grep output: filename:line_number:content |
| 141 | + parts = line.split(":", 2) |
| 142 | + if len(parts) >= 3: |
| 143 | + file_path = parts[0] |
| 144 | + line_num = parts[1] |
| 145 | + line_content = parts[2] |
| 146 | + |
| 147 | + if file_path != current_file: |
| 148 | + if current_file is not None: |
| 149 | + output_lines.append( |
| 150 | + "" |
| 151 | + ) # Empty line between files |
| 152 | + output_lines.append(f"=== {file_path} ===") |
| 153 | + current_file = file_path |
| 154 | + |
| 155 | + if len(line_content) >= 300: |
| 156 | + line_content = line_content[:300] + "..." |
| 157 | + |
| 158 | + if line_numbers: |
| 159 | + output_lines.append( |
| 160 | + f"{line_num:>4}: {line_content}" |
| 161 | + ) |
| 162 | + else: |
| 163 | + output_lines.append(line_content) |
| 164 | + else: |
| 165 | + # Fallback for unusual grep output |
| 166 | + output_lines.append(line) |
| 167 | + else: |
| 168 | + output_lines.append(line) |
| 169 | + |
| 170 | + return Observation(self.name, "\n".join(output_lines)) |
| 171 | + else: |
| 172 | + search_scope = f"in {path}" if path else "in repository" |
| 173 | + pattern_desc = f"pattern '{pattern}'" |
| 174 | + return Observation( |
| 175 | + self.name, |
| 176 | + f"No matches found for {pattern_desc} {search_scope}.", |
| 177 | + ) |
| 178 | + else: |
| 179 | + return Observation(self.name, f"Grep command failed: {output}") |
| 180 | + |
| 181 | + except Exception as e: |
| 182 | + return Observation(self.name, f"Error executing grep: {str(e)}") |
0 commit comments