@@ -241,39 +241,59 @@ def _clean_social_input(text: str) -> str | None:
241
241
removes "http://" or "https://",
242
242
removes "www." prefix,
243
243
removes "@" prefix,
244
+ removes invisible Unicode control characters,
244
245
and decodes URL-encoded characters.
245
246
"""
246
247
if EuroPythonSpeaker ._is_blank_or_na (text ):
247
248
print (f"Blank or N/A input: { text } " )
248
249
return None
249
250
251
+ # Strip leading/trailing whitespace
250
252
text = text .strip ()
251
253
252
- # Handle inputs like "LinkedIn: https://linkedin.com/in/username"
253
- # or "GH: https://github.com/username"
254
+ # Remove any text prefix like "LinkedIn: " or "GH: "
254
255
text = text .split (" " , 1 )[1 ] if ": " in text else text
255
256
257
+ # Remove query strings and trailing commas or slashes
256
258
text = text .split ("?" , 1 )[0 ]
257
259
text = text .split ("," , 1 )[0 ]
258
260
text = text .rstrip ("/" )
259
261
262
+ # Remove URL schemes
260
263
if text .startswith ("https://" ):
261
264
text = text [8 :]
262
265
elif text .startswith ("http://" ):
263
266
text = text [7 :]
264
267
268
+ # Remove "www." prefix
265
269
if text .startswith ("www." ):
266
270
text = text [4 :]
267
271
268
- # Remove @ if present
272
+ # Remove leading @
269
273
if text .startswith ("@" ):
270
274
text = text [1 :]
271
275
272
- # Percent-encode non-ASCII characters
276
+ # Remove invisible Unicode control characters (Bidi, LTR/RTL marks, etc.)
277
+ invisible_chars = [
278
+ "\u200e " ,
279
+ "\u200f " , # LTR / RTL marks
280
+ "\u202a " ,
281
+ "\u202b " ,
282
+ "\u202c " ,
283
+ "\u202d " ,
284
+ "\u202e " , # Directional overrides
285
+ "\u2066 " ,
286
+ "\u2067 " ,
287
+ "\u2068 " ,
288
+ "\u2069 " , # Isolates
289
+ ]
290
+ text = re .sub (f"[{ '' .join (invisible_chars )} ]" , "" , text )
291
+
292
+ # Percent-encode if needed (e.g., non-ASCII chars)
273
293
if not text .isascii ():
274
294
text = quote (text , safe = "@/-_.+~#=:" )
275
295
276
- return text .lower ()
296
+ return text .lower () if text else None
277
297
278
298
279
299
class EuroPythonSession (BaseModel ):
@@ -292,7 +312,7 @@ class EuroPythonSession(BaseModel):
292
312
duration : str = ""
293
313
level : str = ""
294
314
delivery : str = ""
295
- resources : list [dict [str , str ]] | None = None
315
+ resources : list [dict [str , str | None ]] | None = None
296
316
room : str | None = None
297
317
start : datetime | None = None
298
318
end : datetime | None = None
0 commit comments