@@ -38,21 +38,25 @@ def __init__(
3838 super ().__init__ (on_fail = on_fail , search_words = self .slur_list )
3939
4040 def _validate (self , value : str , metadata : dict = None ) -> ValidationResult :
41+ translator = str .maketrans ('' , '' , string .punctuation )
42+
4143 value = self .remove_emojis (value )
42- value = self .remove_nos (value )
4344 value = self .clean_text (value )
4445 words = value .split ()
4546 detected_slurs = []
4647
47- for slur in self . slur_list :
48- if slur in words :
49- if slur not in detected_slurs :
50- detected_slurs . append ( slur )
48+ clean_words = [
49+ w . translate ( translator ). lower ()
50+ for w in words
51+ ]
5152
52- if len (detected_slurs ) > 0 :
53- for word in words :
54- if word in detected_slurs :
55- value = re .sub (rf'\b{ re .escape (word )} \b' , "[REDACTED_SLUR]" , value , flags = re .IGNORECASE )
53+ detected_slurs = list (
54+ set (clean_words ) & set (self .slur_list )
55+ )
56+
57+ for slur in detected_slurs :
58+ pattern = rf'\b{ re .escape (slur )} \b'
59+ value = re .sub (pattern , "[REDACTED_SLUR]" , value , flags = re .IGNORECASE )
5660
5761 if len (detected_slurs ) > 0 :
5862 return FailResult (
@@ -74,15 +78,9 @@ def remove_emojis(self, text):
7478
7579 def clean_text (self , text ):
7680 text = self .normalize_text (text )
77- translator = str .maketrans ('' , '' , string .punctuation )
78- clean_text = text .translate (translator ).lower ()
79- clean_text = re .sub (r'\s+' , ' ' , clean_text ).strip ()
81+ clean_text = re .sub (r'\s+' , ' ' , text ).strip ()
8082 return clean_text
8183
82- def remove_nos (self , text ):
83- text = re .sub (r'\d+' , '' , text )
84- return text
85-
8684 def load_slur_list (self ):
8785 cache_key = self .severity .value if hasattr (self .severity , "value" ) else str (self .severity )
8886
0 commit comments