Skip to content

Commit 04653d3

Browse files
committed
Use commonmark-lib instead of handrolling a markdown parser
1 parent c217346 commit 04653d3

File tree

9 files changed

+216
-782
lines changed

9 files changed

+216
-782
lines changed

blog/build.rkt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"build/render/feed.rkt"
2222
"build/render/page.rkt"
2323
"lang/metadata.rkt"
24-
"markdown/post.rkt"
24+
"markdown.rkt"
2525
"paths.rkt")
2626

2727
(define num-posts-per-page 10)
@@ -55,7 +55,7 @@
5555

5656
(define (markdown-post file-name)
5757
(define path (build-path posts-dir file-name))
58-
(define main-part-promise (delay (~> (parse-markdown-post (file->string path) path)
58+
(define main-part-promise (delay (~> (call-with-input-file* path parse-markdown-post)
5959
ensure-top-tag
6060
(set-blog-tag-prefix file-name))))
6161
(post-dep path main-part-promise))

blog/build/render/scribble.rkt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -342,10 +342,10 @@
342342

343343
; target-url needs an 'a wrapper
344344
(define link-wrap
345-
(match (style-properties style)
346-
[(list _ ... (target-url target) _ ...)
345+
(match (findf target-url? (style-properties style))
346+
[(target-url target)
347347
(cons 'a (hasheq 'href target))]
348-
[_ #f]))
348+
[#f #f]))
349349

350350
; certain symbolic styles need wrappers
351351
(define style-name-wrap
@@ -475,7 +475,7 @@
475475
(define title-str (content->string (strip-aux (part-title-content part)) this part ri))
476476
(define title-content (render-content (part-title-content part) part ri))
477477
(define body-content (append (render-flow (part-blocks part) part ri #t)
478-
(append-map (λ~> (render-part ri)) (part-parts part))))
478+
(append-map (λ~> (render-part ri)) (part-parts part))))
479479
(define post
480480
(rendered-post title-str
481481
title-content

blog/info.rkt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44

55
(define deps
66
'("base"
7-
"functional-lib"
8-
"megaparsack-lib"
7+
["commonmark-lib" #:version "1.1"]
98
"racket-index"
109
"scribble-lib"
1110
"threading-lib"

blog/markdown.rkt

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
#lang racket/base
2+
3+
(require commonmark/parse
4+
(prefix-in md: commonmark/struct)
5+
racket/contract
6+
racket/format
7+
racket/list
8+
racket/match
9+
racket/string
10+
scribble/base
11+
scribble/core
12+
scribble/decode
13+
scribble/decode-struct
14+
scribble/html-properties
15+
threading
16+
(only-in xml
17+
cdata
18+
string->xexpr)
19+
20+
(only-in "lang/post-language.rkt"
21+
code
22+
code-block
23+
footnote-collect-element
24+
footnote-ref
25+
footnotes-section
26+
post-date
27+
post-tags
28+
pygments-block))
29+
30+
(provide (contract-out
31+
[parse-markdown-post (-> input-port? part?)]))
32+
33+
;; -----------------------------------------------------------------------------
34+
35+
(define (parse-markdown-post in)
36+
(parameterize ([current-parse-footnotes? #t])
37+
(define title-info (parse-title-info in))
38+
(define doc (read-document in))
39+
(document->part doc title-info)))
40+
41+
(define (parse-title-info in)
42+
(match-define (list _ title-bytes) (regexp-match #px"^ Title: ([^\n]+)\n" in))
43+
(match-define (list _ year-bytes month-bytes day-bytes)
44+
(regexp-match #px"^ Date: ([0-9]{4})-([0-9]{2})-([0-9]{2})[^\n]*\n" in))
45+
(match-define (list _ tags-bytes)
46+
(regexp-match #px"^ Tags: ([^,\n]+(?:, [^,\n]+)*)\n\n" in))
47+
48+
(match-define (md:document (list (md:paragraph title-content)) '())
49+
(string->document (bytes->string/utf-8 title-bytes)))
50+
(define tags (string-split (bytes->string/utf-8 tags-bytes) ", " #:trim? #f))
51+
52+
(title #:style (style #f (list (post-date (bytes->number year-bytes)
53+
(bytes->number month-bytes)
54+
(bytes->number day-bytes))
55+
(post-tags tags)))
56+
(render-inline title-content)))
57+
58+
(define (bytes->number bs)
59+
(string->number (bytes->string/utf-8 bs)))
60+
61+
;; -----------------------------------------------------------------------------
62+
63+
(define (document->part doc title-decl)
64+
(define part-info (part-start 0
65+
(title-decl-tag-prefix title-decl)
66+
(title-decl-tags title-decl)
67+
(title-decl-style title-decl)
68+
(title-decl-content title-decl)))
69+
(match-define-values [main-part '()] (render-part (md:document-blocks doc) part-info))
70+
(struct-copy part main-part
71+
[to-collect (append (render-footnote-definitions (md:document-footnotes doc))
72+
(part-to-collect main-part))]
73+
[parts (append (part-parts main-part) (list (footnotes-section)))]))
74+
75+
(define (render-part blocks part-info)
76+
(define (collect-initial-blocks initial-blocks blocks)
77+
(match blocks
78+
['() (finish-part (reverse initial-blocks) '() '())]
79+
[(cons (md:heading _ depth) _)
80+
(if (> depth (part-start-depth part-info))
81+
(collect-sub-parts (reverse initial-blocks) '() blocks)
82+
(finish-part (reverse initial-blocks) '() blocks))]
83+
[(cons block blocks)
84+
(collect-initial-blocks (cons (render-block block) initial-blocks) blocks)]))
85+
86+
(define (collect-sub-parts initial-blocks parts blocks)
87+
(match blocks
88+
[(cons (md:heading content depth) blocks)
89+
#:when (> depth (part-start-depth part-info))
90+
(define-values [part blocks*]
91+
(render-part blocks
92+
(make-part-info #:title (render-inline content)
93+
#:depth (add1 (part-start-depth part-info)))))
94+
(collect-sub-parts initial-blocks (cons part parts) blocks*)]
95+
[_
96+
(finish-part initial-blocks (reverse parts) blocks)]))
97+
98+
(define (finish-part initial-blocks parts blocks)
99+
(define part-tags (if (empty? (part-start-tags part-info))
100+
(list `(part ,(make-generated-tag)))
101+
(part-start-tags part-info)))
102+
(define title-content (part-start-title part-info))
103+
(values (part (part-start-tag-prefix part-info)
104+
part-tags
105+
title-content
106+
(part-start-style part-info)
107+
(make-part-to-collect (first part-tags) title-content)
108+
initial-blocks
109+
parts)
110+
blocks))
111+
112+
(collect-initial-blocks '() blocks))
113+
114+
(define (render-block block)
115+
(match block
116+
[(? md:thematic-break?)
117+
(paragraph (style #f (list (alt-tag "hr"))) '())]
118+
[(md:code-block content info-string)
119+
(define language (and~>> info-string (regexp-match #px"^[^ \t\r\n]+") first))
120+
(if language
121+
(pygments-block content #:language language)
122+
(code-block content))]
123+
[(md:html-block content)
124+
(xexpr->block (string->xexpr content))]
125+
[(md:paragraph content)
126+
(paragraph plain (render-inline content))]
127+
[(md:blockquote blocks)
128+
(nested-flow (style 'nested '()) (render-flow blocks))]
129+
[(md:itemization blockss _ start-num)
130+
(itemization (match start-num
131+
[#f plain]
132+
[1 (style 'ordered '())]
133+
[_ (style 'ordered (list (attributes (list (cons 'start (~a start-num))))))])
134+
(map render-flow blockss))]))
135+
136+
(define (render-flow blocks)
137+
(map render-block blocks))
138+
139+
(define (render-inline content)
140+
(match content
141+
[(? string?) content]
142+
[(? list?) (render-inlines content)]
143+
[(? md:line-break?) (linebreak)]
144+
[(md:bold content) (element 'bold (render-inline content))]
145+
[(md:italic content) (element 'italic (render-inline content))]
146+
[(md:code content) (code content)]
147+
[(md:link content dest _)
148+
(element (style #f (list (make-target-url dest))) (render-inline content))]
149+
[(md:image description source title)
150+
(image-element #f (render-inline description) source '() 1)]
151+
[(md:html content)
152+
(raise-arguments-error 'render-inline "unhandled HTML span" "content" content)]
153+
[(md:footnote-reference label)
154+
(footnote-ref label)]))
155+
156+
; Hacky special case for some inline HTML that shows up in a couple posts.
157+
(define (render-inlines contents)
158+
(match contents
159+
['() '()]
160+
[(cons (md:html "<sup>") contents)
161+
(define-values [inner-contents contents*] (scan-inlines/html-close contents "</sup>"))
162+
(cons (superscript (render-inlines inner-contents)) (render-inlines contents*))]
163+
[(cons (md:html "<code>") contents)
164+
(define-values [inner-contents contents*] (scan-inlines/html-close contents "</code>"))
165+
(cons (code (render-inlines inner-contents)) (render-inlines contents*))]
166+
[(cons content contents)
167+
(cons (render-inline content) (render-inlines contents))]))
168+
(define (scan-inlines/html-close contents close-tag)
169+
(let loop ([inner-contents '()]
170+
[contents contents])
171+
(match contents
172+
['() (values (reverse inner-contents) '())]
173+
[(cons (md:html str) contents)
174+
#:when (string=? str close-tag)
175+
(values (reverse inner-contents) contents)]
176+
[(cons inner-content contents)
177+
(loop (cons inner-content inner-contents) contents)])))
178+
179+
(define (xexpr->block xexpr)
180+
(match xexpr
181+
[(list (? symbol? tag) (list (list (? symbol? attr-name) (? string? attr-val)) ...) xexprs ...)
182+
(paragraph (style #f (list (alt-tag (symbol->string tag))
183+
(attributes (map cons attr-name attr-val))))
184+
(map xexpr->content xexprs))]))
185+
(define (xexpr->content xexpr)
186+
(match xexpr
187+
[(? string?) xexpr]
188+
[(list (? symbol? tag) (list (list (? symbol? attr-name) (? string? attr-val)) ...) xexpr ...)
189+
(element (style #f (list (alt-tag (symbol->string tag))
190+
(attributes (map cons attr-name attr-val))))
191+
(map xexpr->content xexpr))]))
192+
193+
(define (render-footnote-definitions defns)
194+
(for/list ([defn (in-list defns)])
195+
(match-define (md:footnote-definition blocks label) defn)
196+
(footnote-collect-element label (render-flow blocks))))
197+
198+
(define (make-part-info #:title title-content #:depth depth)
199+
(struct-copy part-start (section title-content) [depth depth]))
200+
201+
; Taken from scribble/decode.
202+
(define (make-part-to-collect tag title-content)
203+
(list (index-element
204+
#f '() tag
205+
(list (clean-up-index-string
206+
(regexp-replace #px"^\\s+(?:(?:A|An|The)\\s)?"
207+
(content->string title-content) "")))
208+
(list (element #f title-content))
209+
(make-part-index-desc))))

0 commit comments

Comments
 (0)