Skip to content

Commit 4e65944

Browse files
committed
Fixed lexical slur bug
1 parent 65a5c23 commit 4e65944

File tree

1 file changed

+14
-16
lines changed

1 file changed

+14
-16
lines changed

backend/app/core/validators/lexical_slur.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,21 +38,25 @@ def __init__(
3838
super().__init__(on_fail=on_fail, search_words=self.slur_list)
3939

4040
def _validate(self, value: str, metadata: dict = None) -> ValidationResult:
41+
translator = str.maketrans('', '', string.punctuation)
42+
4143
value = self.remove_emojis(value)
42-
value = self.remove_nos(value)
4344
value = self.clean_text(value)
4445
words = value.split()
4546
detected_slurs = []
4647

47-
for slur in self.slur_list:
48-
if slur in words:
49-
if slur not in detected_slurs:
50-
detected_slurs.append(slur)
48+
clean_words = [
49+
w.translate(translator).lower()
50+
for w in words
51+
]
5152

52-
if len(detected_slurs) > 0:
53-
for word in words:
54-
if word in detected_slurs:
55-
value = re.sub(rf'\b{re.escape(word)}\b', "[REDACTED_SLUR]", value, flags=re.IGNORECASE)
53+
detected_slurs = list(
54+
set(clean_words) & set(self.slur_list)
55+
)
56+
57+
for slur in detected_slurs:
58+
pattern = rf'\b{re.escape(slur)}\b'
59+
value = re.sub(pattern, "[REDACTED_SLUR]", value, flags=re.IGNORECASE)
5660

5761
if len(detected_slurs) > 0:
5862
return FailResult(
@@ -74,15 +78,9 @@ def remove_emojis(self, text):
7478

7579
def clean_text(self, text):
7680
text = self.normalize_text(text)
77-
translator = str.maketrans('', '', string.punctuation)
78-
clean_text = text.translate(translator).lower()
79-
clean_text = re.sub(r'\s+', ' ', clean_text).strip()
81+
clean_text = re.sub(r'\s+', ' ', text).strip()
8082
return clean_text
8183

82-
def remove_nos(self, text):
83-
text = re.sub(r'\d+', '', text)
84-
return text
85-
8684
def load_slur_list(self):
8785
cache_key = self.severity.value if hasattr(self.severity, "value") else str(self.severity)
8886

0 commit comments

Comments
 (0)