Skip to content

Commit d4581e6

Browse files
remove unescape (#219)
1 parent 022e2d7 commit d4581e6

File tree

3 files changed

+0
-58
lines changed

3 files changed

+0
-58
lines changed

debug_gym/gym/tools/rewrite.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from debug_gym.gym.entities import Event, Observation
44
from debug_gym.gym.tools.tool import EnvironmentTool
55
from debug_gym.gym.tools.toolbox import Toolbox
6-
from debug_gym.gym.utils import clean_code
76

87

98
@Toolbox.register()
@@ -45,7 +44,6 @@ def _overwrite_file(self, environment, filepath: str, content: str):
4544

4645
def _rewrite_file(self, environment, file_path, start, end, new_code):
4746
original_content = environment.read_file(file_path)
48-
new_code = clean_code(new_code) # str
4947
new_code_lines = new_code.split("\n")
5048
new_code_length = len(new_code_lines)
5149

debug_gym/gym/utils.py

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,6 @@
66
from typing import Any, Callable
77

88

9-
def clean_code(code):
10-
assert isinstance(code, str)
11-
code_line = unescape(code).split("\n")
12-
# Remove trailing white spaces with rstrip.
13-
return "\n".join(line.rstrip() for line in code_line)
14-
15-
169
def filter_non_utf8(text):
1710
"""Filter out non-UTF-8 characters from text."""
1811
if not text:
@@ -22,20 +15,6 @@ def filter_non_utf8(text):
2215
return text
2316

2417

25-
def unescape(s):
26-
try:
27-
# First, try the normal unescape
28-
result = codecs.decode(s, "unicode_escape")
29-
# Test if it can be encoded to UTF-8 (which will happen during JSON encoding)
30-
result.encode("utf-8")
31-
return result
32-
except UnicodeEncodeError:
33-
# If it contains surrogate pairs that can't be encoded to UTF-8,
34-
# replace them with the Unicode replacement character (U+FFFD)
35-
result = codecs.decode(s, "unicode_escape")
36-
return result.encode("utf-8", errors="replace").decode("utf-8")
37-
38-
3918
def show_line_number(code_string, code_path=None, environment=None, start_index=1):
4019
# Show line number for each line
4120
# code_path is the path of the code file in view

tests/gym/test_utils.py

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from debug_gym.gym.envs.env import RepoEnv
66
from debug_gym.gym.utils import (
77
_walk,
8-
clean_code,
98
cleanup_pytest_output,
109
create_ignore_file,
1110
extract_max_score_from_pytest_output,
@@ -15,24 +14,9 @@
1514
is_subdirectory,
1615
make_file_matcher,
1716
show_line_number,
18-
unescape,
1917
)
2018

2119

22-
@pytest.mark.parametrize(
23-
"code, expected",
24-
[
25-
("def foo(): \n return 42 \n", "def foo():\n return 42\n"),
26-
("", ""),
27-
("def foo():\n return 42", "def foo():\n return 42"),
28-
("def foo(): \n return 42 \n\n", "def foo():\n return 42\n\n"),
29-
("def foo():\\n return 42\\n", "def foo():\n return 42\n"),
30-
],
31-
)
32-
def test_clean_code(code, expected):
33-
assert clean_code(code) == expected
34-
35-
3620
def test_show_line_number_empty_code_string():
3721
# code_string is empty
3822
with pytest.raises(
@@ -569,25 +553,6 @@ def test_walk():
569553
assert path_list == expected
570554

571555

572-
def test_unescape_surrogate_pairs():
573-
# Test with regular string
574-
regular_string = "This is a regular string with escapes \\n\\t"
575-
assert unescape(regular_string) == "This is a regular string with escapes \n\t"
576-
577-
# Test with surrogate pairs that would cause UTF-8 encoding issues
578-
surrogate_string = "Test with surrogate \\ud800\\udc00 pair"
579-
result = unescape(surrogate_string)
580-
581-
# Verify we can encode the result to UTF-8 without errors
582-
try:
583-
result.encode("utf-8")
584-
except UnicodeEncodeError:
585-
assert False, "Unescaped string still has invalid surrogate pairs"
586-
587-
# The result should replace the surrogate with a replacement character
588-
assert "Test with surrogate" in result
589-
590-
591556
def test_filter_non_utf8():
592557
"""Test the filter_non_utf8 function with various inputs."""
593558

0 commit comments

Comments
 (0)