Skip to content

Commit aebb9db

Browse files
YakshithKsimonrosenbergopenhands-agent
authored
fix: sanitize @OpenHands only in GitHub comments (backend) (#1020)
Co-authored-by: simonrosenberg <[email protected]> Co-authored-by: openhands <[email protected]>
1 parent f70b945 commit aebb9db

File tree

6 files changed

+194
-4
lines changed

6 files changed

+194
-4
lines changed

.github/workflows/integration-runner.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,8 @@ jobs:
233233
- name: Create consolidated PR comment
234234
if: github.event_name == 'pull_request_target'
235235
run: |
236-
COMMENT_BODY=$(cat consolidated_report.md)
236+
# Sanitize @OpenHands mentions to prevent self-mention loops
237+
COMMENT_BODY=$(uv run python -c "from openhands.sdk.utils.github import sanitize_openhands_mentions; import sys; print(sanitize_openhands_mentions(sys.stdin.read()), end='')" < consolidated_report.md)
237238
# Use GitHub CLI to create comment with explicit PR number
238239
echo "$COMMENT_BODY" | gh pr comment ${{ github.event.pull_request.number }} --body-file -
239240
env:
@@ -243,8 +244,8 @@ jobs:
243244
if: github.event_name == 'schedule'
244245
id: read_report
245246
run: |
246-
# Read the report and set as output
247-
REPORT_CONTENT=$(cat consolidated_report.md)
247+
# Read and sanitize the report, then set as output
248+
REPORT_CONTENT=$(uv run python -c "from openhands.sdk.utils.github import sanitize_openhands_mentions; import sys; print(sanitize_openhands_mentions(sys.stdin.read()), end='')" < consolidated_report.md)
248249
echo "report<<EOF" >> $GITHUB_OUTPUT
249250
echo "$REPORT_CONTENT" >> $GITHUB_OUTPUT
250251
echo "EOF" >> $GITHUB_OUTPUT

.github/workflows/run-examples.yml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,12 @@ jobs:
106106
API_URL="https://api.github.com/repos/${REPO_OWNER}/${REPO_NAME}/issues/${PR_NUMBER}/comments"
107107
fi
108108
109+
# Function to sanitize @OpenHands mentions using the SDK utility
110+
sanitize_comment() {
111+
local text="$1"
112+
printf "%s" "$text" | uv run python -c "from openhands.sdk.utils.github import sanitize_openhands_mentions; import sys; print(sanitize_openhands_mentions(sys.stdin.read()), end='')"
113+
}
114+
109115
# Function to update PR comment
110116
update_comment() {
111117
# Skip if not a PR event
@@ -116,6 +122,9 @@ jobs:
116122
local comment_body="$1"
117123
local response
118124
125+
# Sanitize @OpenHands mentions before posting
126+
comment_body=$(sanitize_comment "$comment_body")
127+
119128
if [ -z "$COMMENT_ID" ]; then
120129
# Create new comment
121130
response=$(curl -s -X POST \
@@ -303,7 +312,8 @@ jobs:
303312
shell: bash
304313
run: |
305314
if [ -f examples_report.md ]; then
306-
REPORT_CONTENT=$(cat examples_report.md)
315+
# Sanitize @OpenHands mentions before posting
316+
REPORT_CONTENT=$(uv run python -c "from openhands.sdk.utils.github import sanitize_openhands_mentions; import sys; print(sanitize_openhands_mentions(sys.stdin.read()), end='')" < examples_report.md)
307317
echo "report<<EOF" >> $GITHUB_OUTPUT
308318
echo "$REPORT_CONTENT" >> $GITHUB_OUTPUT
309319
echo "EOF" >> $GITHUB_OUTPUT

examples/03_github_workflows/02_pr_review/agent_script.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838

3939
from openhands.sdk import LLM, Conversation, get_logger # noqa: E402
4040
from openhands.sdk.conversation import get_agent_final_response # noqa: E402
41+
from openhands.sdk.utils.github import sanitize_openhands_mentions # noqa: E402
4142
from openhands.tools.preset.default import get_default_agent # noqa: E402
4243

4344

@@ -51,6 +52,9 @@ def post_review_comment(review_content: str) -> None:
5152
Args:
5253
review_content: The review content to post
5354
"""
55+
# Sanitize @OpenHands mentions to prevent self-mention loops
56+
review_content = sanitize_openhands_mentions(review_content)
57+
5458
logger.info("Posting review comment to GitHub...")
5559
pr_number = os.getenv("PR_NUMBER")
5660
repo_name = os.getenv("REPO_NAME")

openhands-sdk/openhands/sdk/utils/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
deprecated,
55
warn_deprecated,
66
)
7+
from .github import sanitize_openhands_mentions
78
from .truncate import (
89
DEFAULT_TEXT_CONTENT_LIMIT,
910
DEFAULT_TRUNCATE_NOTICE,
@@ -17,4 +18,5 @@
1718
"maybe_truncate",
1819
"deprecated",
1920
"warn_deprecated",
21+
"sanitize_openhands_mentions",
2022
]
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""Utility functions for GitHub integrations."""
2+
3+
import re
4+
5+
6+
# Zero-width joiner character (U+200D)
7+
# We use ZWJ instead of ZWSP (U+200B) because:
8+
# - ZWJ is semantically more appropriate (joins characters without adding space)
9+
# - ZWJ has better support in modern renderers
10+
# - ZWJ is invisible and doesn't affect text rendering or selection
11+
ZWJ = "\u200d"
12+
13+
14+
def sanitize_openhands_mentions(text: str) -> str:
15+
"""Sanitize @OpenHands mentions in text to prevent self-mention loops.
16+
17+
This function inserts a zero-width joiner (ZWJ) after the @ symbol in
18+
@OpenHands mentions, making them non-clickable in GitHub comments while
19+
preserving readability. The original case of the mention is preserved.
20+
21+
Args:
22+
text: The text to sanitize
23+
24+
Returns:
25+
Text with sanitized @OpenHands mentions (e.g., "@OpenHands" -> "@‍OpenHands")
26+
27+
Examples:
28+
>>> sanitize_openhands_mentions("Thanks @OpenHands for the help!")
29+
'Thanks @\\u200dOpenHands for the help!'
30+
>>> sanitize_openhands_mentions("Check @openhands and @OPENHANDS")
31+
'Check @\\u200dopenhands and @\\u200dOPENHANDS'
32+
>>> sanitize_openhands_mentions("No mention here")
33+
'No mention here'
34+
"""
35+
# Pattern to match @OpenHands mentions at word boundaries
36+
# Uses re.IGNORECASE so we don't need [Oo]pen[Hh]ands
37+
# Capture group preserves the original case
38+
pattern = r"@(OpenHands)\b"
39+
40+
# Replace @ with @ + ZWJ while preserving the original case
41+
# The \1 backreference preserves the matched case
42+
sanitized = re.sub(pattern, f"@{ZWJ}\\1", text, flags=re.IGNORECASE)
43+
44+
return sanitized

tests/sdk/utils/test_github.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
"""Tests for GitHub utility functions."""
2+
3+
from openhands.sdk.utils.github import ZWJ, sanitize_openhands_mentions
4+
5+
6+
def test_sanitize_basic_mention():
7+
"""Test basic @OpenHands mention is sanitized."""
8+
text = "Thanks @OpenHands for the help!"
9+
expected = f"Thanks @{ZWJ}OpenHands for the help!"
10+
assert sanitize_openhands_mentions(text) == expected
11+
12+
13+
def test_sanitize_case_insensitive():
14+
"""Test that mentions are sanitized regardless of case."""
15+
test_cases = [
16+
("Check @OpenHands here", f"Check @{ZWJ}OpenHands here"),
17+
("Check @openhands here", f"Check @{ZWJ}openhands here"),
18+
("Check @OPENHANDS here", f"Check @{ZWJ}OPENHANDS here"),
19+
("Check @oPeNhAnDs here", f"Check @{ZWJ}oPeNhAnDs here"),
20+
]
21+
for input_text, expected in test_cases:
22+
assert sanitize_openhands_mentions(input_text) == expected
23+
24+
25+
def test_sanitize_multiple_mentions():
26+
"""Test multiple mentions in the same text."""
27+
text = "Both @OpenHands and @openhands should be sanitized"
28+
expected = f"Both @{ZWJ}OpenHands and @{ZWJ}openhands should be sanitized"
29+
assert sanitize_openhands_mentions(text) == expected
30+
31+
32+
def test_sanitize_with_punctuation():
33+
"""Test mentions followed by punctuation."""
34+
test_cases = [
35+
("Thanks @OpenHands!", f"Thanks @{ZWJ}OpenHands!"),
36+
("Hello @OpenHands.", f"Hello @{ZWJ}OpenHands."),
37+
("See @OpenHands,", f"See @{ZWJ}OpenHands,"),
38+
("By @OpenHands:", f"By @{ZWJ}OpenHands:"),
39+
("From @OpenHands;", f"From @{ZWJ}OpenHands;"),
40+
("Hi @OpenHands?", f"Hi @{ZWJ}OpenHands?"),
41+
("Use @OpenHands)", f"Use @{ZWJ}OpenHands)"),
42+
("Try (@OpenHands)", f"Try (@{ZWJ}OpenHands)"),
43+
]
44+
for input_text, expected in test_cases:
45+
assert sanitize_openhands_mentions(input_text) == expected
46+
47+
48+
def test_no_sanitize_partial_words():
49+
"""Test that partial word matches are NOT sanitized."""
50+
test_cases = [
51+
"OpenHandsTeam",
52+
"MyOpenHands",
53+
"OpenHandsBot",
54+
"#OpenHands",
55+
]
56+
for text in test_cases:
57+
# Partial words without @ should remain unchanged
58+
assert sanitize_openhands_mentions(text) == text
59+
60+
61+
def test_no_op_cases():
62+
"""Test cases where no sanitization should occur."""
63+
test_cases = [
64+
"",
65+
"No mentions here",
66+
"Just some text",
67+
"@GitHub",
68+
"@Other",
69+
"OpenHands without @",
70+
]
71+
for text in test_cases:
72+
assert sanitize_openhands_mentions(text) == text
73+
74+
75+
def test_sanitize_at_line_boundaries():
76+
"""Test mentions at the start and end of lines."""
77+
test_cases = [
78+
("@OpenHands at start", f"@{ZWJ}OpenHands at start"),
79+
("at end @OpenHands", f"at end @{ZWJ}OpenHands"),
80+
("@OpenHands", f"@{ZWJ}OpenHands"),
81+
]
82+
for input_text, expected in test_cases:
83+
assert sanitize_openhands_mentions(input_text) == expected
84+
85+
86+
def test_sanitize_multiline_text():
87+
"""Test sanitization in multiline text."""
88+
text = """Hello @OpenHands!
89+
90+
This is a test with @openhands mentioned.
91+
92+
Thanks @OPENHANDS for everything!"""
93+
94+
expected = f"""Hello @{ZWJ}OpenHands!
95+
96+
This is a test with @{ZWJ}openhands mentioned.
97+
98+
Thanks @{ZWJ}OPENHANDS for everything!"""
99+
100+
assert sanitize_openhands_mentions(text) == expected
101+
102+
103+
def test_sanitize_with_urls():
104+
"""Test that URLs containing OpenHands are handled correctly."""
105+
test_cases = [
106+
# URL should not be sanitized
107+
("Visit https://github.com/OpenHands", "Visit https://github.com/OpenHands"),
108+
# But mention should be sanitized
109+
(
110+
"See @OpenHands at https://github.com/OpenHands",
111+
f"See @{ZWJ}OpenHands at https://github.com/OpenHands",
112+
),
113+
]
114+
for input_text, expected in test_cases:
115+
assert sanitize_openhands_mentions(input_text) == expected
116+
117+
118+
def test_sanitize_preserves_whitespace():
119+
"""Test that whitespace is preserved correctly."""
120+
text = " @OpenHands \n @openhands "
121+
expected = f" @{ZWJ}OpenHands \n @{ZWJ}openhands "
122+
assert sanitize_openhands_mentions(text) == expected
123+
124+
125+
def test_zwj_constant():
126+
"""Test that ZWJ constant is correctly defined."""
127+
assert ZWJ == "\u200d"
128+
assert len(ZWJ) == 1
129+
assert ord(ZWJ) == 0x200D

0 commit comments

Comments
 (0)