|
| 1 | +#lang racket/base |
| 2 | + |
| 3 | +(require commonmark/parse |
| 4 | + (prefix-in md: commonmark/struct) |
| 5 | + racket/contract |
| 6 | + racket/format |
| 7 | + racket/list |
| 8 | + racket/match |
| 9 | + racket/string |
| 10 | + scribble/base |
| 11 | + scribble/core |
| 12 | + scribble/decode |
| 13 | + scribble/decode-struct |
| 14 | + scribble/html-properties |
| 15 | + threading |
| 16 | + (only-in xml |
| 17 | + cdata |
| 18 | + string->xexpr) |
| 19 | + |
| 20 | + (only-in "lang/post-language.rkt" |
| 21 | + code |
| 22 | + code-block |
| 23 | + footnote-collect-element |
| 24 | + footnote-ref |
| 25 | + footnotes-section |
| 26 | + post-date |
| 27 | + post-tags |
| 28 | + pygments-block)) |
| 29 | + |
| 30 | +(provide (contract-out |
| 31 | + [parse-markdown-post (-> input-port? part?)])) |
| 32 | + |
| 33 | +;; ----------------------------------------------------------------------------- |
| 34 | + |
| 35 | +(define (parse-markdown-post in) |
| 36 | + (parameterize ([current-parse-footnotes? #t]) |
| 37 | + (define title-info (parse-title-info in)) |
| 38 | + (define doc (read-document in)) |
| 39 | + (document->part doc title-info))) |
| 40 | + |
| 41 | +(define (parse-title-info in) |
| 42 | + (match-define (list _ title-bytes) (regexp-match #px"^ Title: ([^\n]+)\n" in)) |
| 43 | + (match-define (list _ year-bytes month-bytes day-bytes) |
| 44 | + (regexp-match #px"^ Date: ([0-9]{4})-([0-9]{2})-([0-9]{2})[^\n]*\n" in)) |
| 45 | + (match-define (list _ tags-bytes) |
| 46 | + (regexp-match #px"^ Tags: ([^,\n]+(?:, [^,\n]+)*)\n\n" in)) |
| 47 | + |
| 48 | + (match-define (md:document (list (md:paragraph title-content)) '()) |
| 49 | + (string->document (bytes->string/utf-8 title-bytes))) |
| 50 | + (define tags (string-split (bytes->string/utf-8 tags-bytes) ", " #:trim? #f)) |
| 51 | + |
| 52 | + (title #:style (style #f (list (post-date (bytes->number year-bytes) |
| 53 | + (bytes->number month-bytes) |
| 54 | + (bytes->number day-bytes)) |
| 55 | + (post-tags tags))) |
| 56 | + (render-inline title-content))) |
| 57 | + |
| 58 | +(define (bytes->number bs) |
| 59 | + (string->number (bytes->string/utf-8 bs))) |
| 60 | + |
| 61 | +;; ----------------------------------------------------------------------------- |
| 62 | + |
| 63 | +(define (document->part doc title-decl) |
| 64 | + (define part-info (part-start 0 |
| 65 | + (title-decl-tag-prefix title-decl) |
| 66 | + (title-decl-tags title-decl) |
| 67 | + (title-decl-style title-decl) |
| 68 | + (title-decl-content title-decl))) |
| 69 | + (match-define-values [main-part '()] (render-part (md:document-blocks doc) part-info)) |
| 70 | + (struct-copy part main-part |
| 71 | + [to-collect (append (render-footnote-definitions (md:document-footnotes doc)) |
| 72 | + (part-to-collect main-part))] |
| 73 | + [parts (append (part-parts main-part) (list (footnotes-section)))])) |
| 74 | + |
| 75 | +(define (render-part blocks part-info) |
| 76 | + (define (collect-initial-blocks initial-blocks blocks) |
| 77 | + (match blocks |
| 78 | + ['() (finish-part (reverse initial-blocks) '() '())] |
| 79 | + [(cons (md:heading _ depth) _) |
| 80 | + (if (> depth (part-start-depth part-info)) |
| 81 | + (collect-sub-parts (reverse initial-blocks) '() blocks) |
| 82 | + (finish-part (reverse initial-blocks) '() blocks))] |
| 83 | + [(cons block blocks) |
| 84 | + (collect-initial-blocks (cons (render-block block) initial-blocks) blocks)])) |
| 85 | + |
| 86 | + (define (collect-sub-parts initial-blocks parts blocks) |
| 87 | + (match blocks |
| 88 | + [(cons (md:heading content depth) blocks) |
| 89 | + #:when (> depth (part-start-depth part-info)) |
| 90 | + (define-values [part blocks*] |
| 91 | + (render-part blocks |
| 92 | + (make-part-info #:title (render-inline content) |
| 93 | + #:depth (add1 (part-start-depth part-info))))) |
| 94 | + (collect-sub-parts initial-blocks (cons part parts) blocks*)] |
| 95 | + [_ |
| 96 | + (finish-part initial-blocks (reverse parts) blocks)])) |
| 97 | + |
| 98 | + (define (finish-part initial-blocks parts blocks) |
| 99 | + (define part-tags (if (empty? (part-start-tags part-info)) |
| 100 | + (list `(part ,(make-generated-tag))) |
| 101 | + (part-start-tags part-info))) |
| 102 | + (define title-content (part-start-title part-info)) |
| 103 | + (values (part (part-start-tag-prefix part-info) |
| 104 | + part-tags |
| 105 | + title-content |
| 106 | + (part-start-style part-info) |
| 107 | + (make-part-to-collect (first part-tags) title-content) |
| 108 | + initial-blocks |
| 109 | + parts) |
| 110 | + blocks)) |
| 111 | + |
| 112 | + (collect-initial-blocks '() blocks)) |
| 113 | + |
| 114 | +(define (render-block block) |
| 115 | + (match block |
| 116 | + [(? md:thematic-break?) |
| 117 | + (paragraph (style #f (list (alt-tag "hr"))) '())] |
| 118 | + [(md:code-block content info-string) |
| 119 | + (define language (and~>> info-string (regexp-match #px"^[^ \t\r\n]+") first)) |
| 120 | + (if language |
| 121 | + (pygments-block content #:language language) |
| 122 | + (code-block content))] |
| 123 | + [(md:html-block content) |
| 124 | + (xexpr->block (string->xexpr content))] |
| 125 | + [(md:paragraph content) |
| 126 | + (paragraph plain (render-inline content))] |
| 127 | + [(md:blockquote blocks) |
| 128 | + (nested-flow (style 'nested '()) (render-flow blocks))] |
| 129 | + [(md:itemization blockss _ start-num) |
| 130 | + (itemization (match start-num |
| 131 | + [#f plain] |
| 132 | + [1 (style 'ordered '())] |
| 133 | + [_ (style 'ordered (list (attributes (list (cons 'start (~a start-num))))))]) |
| 134 | + (map render-flow blockss))])) |
| 135 | + |
| 136 | +(define (render-flow blocks) |
| 137 | + (map render-block blocks)) |
| 138 | + |
| 139 | +(define (render-inline content) |
| 140 | + (match content |
| 141 | + [(? string?) content] |
| 142 | + [(? list?) (render-inlines content)] |
| 143 | + [(? md:line-break?) (linebreak)] |
| 144 | + [(md:bold content) (element 'bold (render-inline content))] |
| 145 | + [(md:italic content) (element 'italic (render-inline content))] |
| 146 | + [(md:code content) (code content)] |
| 147 | + [(md:link content dest _) |
| 148 | + (element (style #f (list (make-target-url dest))) (render-inline content))] |
| 149 | + [(md:image description source title) |
| 150 | + (image-element #f (render-inline description) source '() 1)] |
| 151 | + [(md:html content) |
| 152 | + (raise-arguments-error 'render-inline "unhandled HTML span" "content" content)] |
| 153 | + [(md:footnote-reference label) |
| 154 | + (footnote-ref label)])) |
| 155 | + |
| 156 | +; Hacky special case for some inline HTML that shows up in a couple posts. |
| 157 | +(define (render-inlines contents) |
| 158 | + (match contents |
| 159 | + ['() '()] |
| 160 | + [(cons (md:html "<sup>") contents) |
| 161 | + (define-values [inner-contents contents*] (scan-inlines/html-close contents "</sup>")) |
| 162 | + (cons (superscript (render-inlines inner-contents)) (render-inlines contents*))] |
| 163 | + [(cons (md:html "<code>") contents) |
| 164 | + (define-values [inner-contents contents*] (scan-inlines/html-close contents "</code>")) |
| 165 | + (cons (code (render-inlines inner-contents)) (render-inlines contents*))] |
| 166 | + [(cons content contents) |
| 167 | + (cons (render-inline content) (render-inlines contents))])) |
| 168 | +(define (scan-inlines/html-close contents close-tag) |
| 169 | + (let loop ([inner-contents '()] |
| 170 | + [contents contents]) |
| 171 | + (match contents |
| 172 | + ['() (values (reverse inner-contents) '())] |
| 173 | + [(cons (md:html str) contents) |
| 174 | + #:when (string=? str close-tag) |
| 175 | + (values (reverse inner-contents) contents)] |
| 176 | + [(cons inner-content contents) |
| 177 | + (loop (cons inner-content inner-contents) contents)]))) |
| 178 | + |
| 179 | +(define (xexpr->block xexpr) |
| 180 | + (match xexpr |
| 181 | + [(list (? symbol? tag) (list (list (? symbol? attr-name) (? string? attr-val)) ...) xexprs ...) |
| 182 | + (paragraph (style #f (list (alt-tag (symbol->string tag)) |
| 183 | + (attributes (map cons attr-name attr-val)))) |
| 184 | + (map xexpr->content xexprs))])) |
| 185 | +(define (xexpr->content xexpr) |
| 186 | + (match xexpr |
| 187 | + [(? string?) xexpr] |
| 188 | + [(list (? symbol? tag) (list (list (? symbol? attr-name) (? string? attr-val)) ...) xexpr ...) |
| 189 | + (element (style #f (list (alt-tag (symbol->string tag)) |
| 190 | + (attributes (map cons attr-name attr-val)))) |
| 191 | + (map xexpr->content xexpr))])) |
| 192 | + |
| 193 | +(define (render-footnote-definitions defns) |
| 194 | + (for/list ([defn (in-list defns)]) |
| 195 | + (match-define (md:footnote-definition blocks label) defn) |
| 196 | + (footnote-collect-element label (render-flow blocks)))) |
| 197 | + |
| 198 | +(define (make-part-info #:title title-content #:depth depth) |
| 199 | + (struct-copy part-start (section title-content) [depth depth])) |
| 200 | + |
| 201 | +; Taken from scribble/decode. |
| 202 | +(define (make-part-to-collect tag title-content) |
| 203 | + (list (index-element |
| 204 | + #f '() tag |
| 205 | + (list (clean-up-index-string |
| 206 | + (regexp-replace #px"^\\s+(?:(?:A|An|The)\\s)?" |
| 207 | + (content->string title-content) ""))) |
| 208 | + (list (element #f title-content)) |
| 209 | + (make-part-index-desc)))) |
0 commit comments