remove unescape (#219)

xingdi-eric-yuan · web-flow · commit d4581e6a4887 · 2025-08-13T21:55:47.000-04:00
diff --git a/debug_gym/gym/tools/rewrite.py b/debug_gym/gym/tools/rewrite.py
@@ -3,7 +3,6 @@
 from debug_gym.gym.entities import Event, Observation
 from debug_gym.gym.tools.tool import EnvironmentTool
 from debug_gym.gym.tools.toolbox import Toolbox
-from debug_gym.gym.utils import clean_code
 
 
 @Toolbox.register()
@@ -45,7 +44,6 @@ def _overwrite_file(self, environment, filepath: str, content: str):
 
     def _rewrite_file(self, environment, file_path, start, end, new_code):
         original_content = environment.read_file(file_path)
-        new_code = clean_code(new_code)  # str
         new_code_lines = new_code.split("\n")
         new_code_length = len(new_code_lines)
 
diff --git a/debug_gym/gym/utils.py b/debug_gym/gym/utils.py
@@ -6,13 +6,6 @@
 from typing import Any, Callable
 
 
-def clean_code(code):
-    assert isinstance(code, str)
-    code_line = unescape(code).split("\n")
-    # Remove trailing white spaces with rstrip.
-    return "\n".join(line.rstrip() for line in code_line)
-
-
 def filter_non_utf8(text):
     """Filter out non-UTF-8 characters from text."""
     if not text:
@@ -22,20 +15,6 @@ def filter_non_utf8(text):
     return text
 
 
-def unescape(s):
-    try:
-        # First, try the normal unescape
-        result = codecs.decode(s, "unicode_escape")
-        # Test if it can be encoded to UTF-8 (which will happen during JSON encoding)
-        result.encode("utf-8")
-        return result
-    except UnicodeEncodeError:
-        # If it contains surrogate pairs that can't be encoded to UTF-8,
-        # replace them with the Unicode replacement character (U+FFFD)
-        result = codecs.decode(s, "unicode_escape")
-        return result.encode("utf-8", errors="replace").decode("utf-8")
-
-
 def show_line_number(code_string, code_path=None, environment=None, start_index=1):
     # Show line number for each line
     # code_path is the path of the code file in view
diff --git a/tests/gym/test_utils.py b/tests/gym/test_utils.py
@@ -5,7 +5,6 @@
 from debug_gym.gym.envs.env import RepoEnv
 from debug_gym.gym.utils import (
     _walk,
-    clean_code,
     cleanup_pytest_output,
     create_ignore_file,
     extract_max_score_from_pytest_output,
@@ -15,24 +14,9 @@
     is_subdirectory,
     make_file_matcher,
     show_line_number,
-    unescape,
 )
 
 
-@pytest.mark.parametrize(
-    "code, expected",
-    [
-        ("def foo():    \n    return 42    \n", "def foo():\n    return 42\n"),
-        ("", ""),
-        ("def foo():\n    return 42", "def foo():\n    return 42"),
-        ("def foo():    \n    return 42    \n\n", "def foo():\n    return 42\n\n"),
-        ("def foo():\\n    return 42\\n", "def foo():\n    return 42\n"),
-    ],
-)
-def test_clean_code(code, expected):
-    assert clean_code(code) == expected
-
-
 def test_show_line_number_empty_code_string():
     # code_string is empty
     with pytest.raises(
@@ -569,25 +553,6 @@ def test_walk():
     assert path_list == expected
 
 
-def test_unescape_surrogate_pairs():
-    # Test with regular string
-    regular_string = "This is a regular string with escapes \\n\\t"
-    assert unescape(regular_string) == "This is a regular string with escapes \n\t"
-
-    # Test with surrogate pairs that would cause UTF-8 encoding issues
-    surrogate_string = "Test with surrogate \\ud800\\udc00 pair"
-    result = unescape(surrogate_string)
-
-    # Verify we can encode the result to UTF-8 without errors
-    try:
-        result.encode("utf-8")
-    except UnicodeEncodeError:
-        assert False, "Unescaped string still has invalid surrogate pairs"
-
-    # The result should replace the surrogate with a replacement character
-    assert "Test with surrogate" in result
-
-
 def test_filter_non_utf8():
     """Test the filter_non_utf8 function with various inputs."""