Skip to content

Commit def9098

Browse files
Fix resource parsing errors and some smaller things (#144)
* Fix resource parsing errors and some smaller things * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 9a7c5d1 commit def9098

File tree

3 files changed

+28
-7
lines changed

3 files changed

+28
-7
lines changed

src/models/europython.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -241,39 +241,59 @@ def _clean_social_input(text: str) -> str | None:
241241
removes "http://" or "https://",
242242
removes "www." prefix,
243243
removes "@" prefix,
244+
removes invisible Unicode control characters,
244245
and decodes URL-encoded characters.
245246
"""
246247
if EuroPythonSpeaker._is_blank_or_na(text):
247248
print(f"Blank or N/A input: {text}")
248249
return None
249250

251+
# Strip leading/trailing whitespace
250252
text = text.strip()
251253

252-
# Handle inputs like "LinkedIn: https://linkedin.com/in/username"
253-
# or "GH: https://github.com/username"
254+
# Remove any text prefix like "LinkedIn: " or "GH: "
254255
text = text.split(" ", 1)[1] if ": " in text else text
255256

257+
# Remove query strings and trailing commas or slashes
256258
text = text.split("?", 1)[0]
257259
text = text.split(",", 1)[0]
258260
text = text.rstrip("/")
259261

262+
# Remove URL schemes
260263
if text.startswith("https://"):
261264
text = text[8:]
262265
elif text.startswith("http://"):
263266
text = text[7:]
264267

268+
# Remove "www." prefix
265269
if text.startswith("www."):
266270
text = text[4:]
267271

268-
# Remove @ if present
272+
# Remove leading @
269273
if text.startswith("@"):
270274
text = text[1:]
271275

272-
# Percent-encode non-ASCII characters
276+
# Remove invisible Unicode control characters (Bidi, LTR/RTL marks, etc.)
277+
invisible_chars = [
278+
"\u200e",
279+
"\u200f", # LTR / RTL marks
280+
"\u202a",
281+
"\u202b",
282+
"\u202c",
283+
"\u202d",
284+
"\u202e", # Directional overrides
285+
"\u2066",
286+
"\u2067",
287+
"\u2068",
288+
"\u2069", # Isolates
289+
]
290+
text = re.sub(f"[{''.join(invisible_chars)}]", "", text)
291+
292+
# Percent-encode if needed (e.g., non-ASCII chars)
273293
if not text.isascii():
274294
text = quote(text, safe="@/-_.+~#=:")
275295

276-
return text.lower()
296+
return text.lower() if text else None
277297

278298

279299
class EuroPythonSession(BaseModel):
@@ -292,7 +312,7 @@ class EuroPythonSession(BaseModel):
292312
duration: str = ""
293313
level: str = ""
294314
delivery: str = ""
295-
resources: list[dict[str, str]] | None = None
315+
resources: list[dict[str, str | None]] | None = None
296316
room: str | None = None
297317
start: datetime | None = None
298318
end: datetime | None = None

src/models/pretalx.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class PretalxSubmission(BaseModel):
6363
state: SubmissionState
6464
abstract: str = ""
6565
duration: str = ""
66-
resources: list[dict[str, str]] | None = None
66+
resources: list[dict[str, str | None]] | None = None
6767
answers: list[PretalxAnswer]
6868
slots: list[PretalxSlot] = Field(default_factory=list, exclude=True)
6969
slot_count: int = Field(..., exclude=True)

tests/test_extract_socials.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def test_extract_linkedin_url(input_string, result):
6565
@pytest.mark.parametrize(
6666
("input_string", "result"),
6767
[
68+
("@user.dev", "https://bsky.app/profile/user.dev"),
6869
("user123", "https://bsky.app/profile/user123.bsky.social"),
6970
("@user123", "https://bsky.app/profile/user123.bsky.social"),
7071
("user123.bsky.social", "https://bsky.app/profile/user123.bsky.social"),

0 commit comments

Comments
 (0)