Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/workflows/test-image-links.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: Test image links

on:
pull_request:
push:

jobs:
test-summary:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Run image link check
run: python tests/test_image_links.py
72 changes: 72 additions & 0 deletions tests/test_image_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env python3

# This script checks that no external images are linked.
# Images should instead be self-hosted where possible.

import re
import sys
from pathlib import Path
from urllib.parse import urlparse

REPO_ROOT = Path.cwd()

# Folders to check
DOCS_DIRS = [
REPO_ROOT / r"src/en",
]

# Configure allowed hosts (empty = only relative paths allowed)
ALLOWED_HOSTS = [
# "example.com",
]

def main() -> int:
# regex explanation:
# links have the following format
# ![alt text](image-url)
# [^\]]* matches any number of characters that are not a ]
# ([^)])+ captures any text with at least one character until a ( appears

IMAGE_REGEX = re.compile(r'!\[[^\]]*\]\(([^)]+)\)')

docs = []

for docs_dir in DOCS_DIRS:
for md_file in docs_dir.rglob("*.md"):
docs.append(md_file)

print(f"found {len(docs)} .md files")

errors = []

for md_file in docs:
print(f"checking {md_file}")
content = md_file.read_text(encoding="utf-8")
for match in IMAGE_REGEX.findall(content):
url = match.strip() # remove whitespace

# Ignore anchors and mailto
if url.startswith("#") or url.startswith("mailto:"):
continue

parsed = urlparse(url)

# Relative paths are OK
if not parsed.scheme and not parsed.netloc:
continue

# Absolute URLs must be explicitly allowed
if parsed.netloc not in ALLOWED_HOSTS:
errors.append(f"{md_file}: found external image link: {url}")

if errors:
print("❌ External image links detected:\n")
print("\n".join(errors))
print("Make sure to self-host all images instead of using external hosts like imgur!")
print("Put them into the assets folder.")
return 1

print("✅ All image links are self-hosted.")
return 0

exit(main())
2 changes: 2 additions & 0 deletions tests/test_summary.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/usr/bin/env python3

# This script checks if all documents in the repository are linked in the table of contents in SUMMARY.md so that they can be reached via the website.

import re
import sys
from pathlib import Path
Expand Down
Loading