-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcompile.py
More file actions
80 lines (59 loc) · 2.19 KB
/
compile.py
File metadata and controls
80 lines (59 loc) · 2.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os, json, re
import re
def sanitize_id(text):
slug = text.lower() # 1. Lowercase
slug = re.sub(r'[^a-z0-9\s-]', '', slug) # 2. Remove special characters (keep a-z, 0-9, whitespace, and hyphens)
slug = re.sub(r'\s+', '-', slug) # 3. Replace spaces with dashes
slug = re.sub(r'-+', '-', slug) # 4. Remove consecutive dashes (e.g., "a---b" ->
return slug
def parse_markdown_headers(markdown_text):
"""
Parses a markdown string and extracts headers with level, id, and title.
Ignores headers found inside code blocks.
"""
headers = []
lines = markdown_text.split('\n')
in_code_block = False
# Regex to match a header: 1-6 hashes, a space, then the title
header_pattern = re.compile(r'^(#{1,6})\s+(.+)$')
for line in lines:
stripped_line = line.strip()
# Check for code block fences (``` or ~~~)
# We toggle the state if we see a fence
if stripped_line.startswith('```') or stripped_line.startswith('~~~'):
in_code_block = not in_code_block
continue
# If we are in a code block, skip processing this line
if in_code_block:
continue
# Check if the line is a header
match = header_pattern.match(line)
if match:
hashes, raw_title = match.groups()
level = len(hashes)
title = raw_title.strip()
slug = sanitize_id(title)
headers.append({
"level": level,
"id": slug,
"title": title
})
return headers
pages = []
for entry in os.scandir('.'):
if entry.is_dir() and not entry.name.startswith("."):
content_path = entry.path + "/index.md"
content_file = open(content_path, 'r')
content = content_file.read()
meta_path = entry.path + "/metadata.json"
meta_file = open(meta_path, 'r')
meta = json.loads(meta_file.read())
meta['slug'] = entry.name
meta['content'] = content
meta['toc'] = parse_markdown_headers(content)
pages.append(meta)
data = {
"pages": pages
}
with open('./directory.json', 'w') as file:
file.write(json.dumps(data, indent=2))