diff --git a/commonmark-doc/scribblings/commonmark.scrbl b/commonmark-doc/scribblings/commonmark.scrbl index 5a1c0d9..e52b4cd 100644 --- a/commonmark-doc/scribblings/commonmark.scrbl +++ b/commonmark-doc/scribblings/commonmark.scrbl @@ -68,6 +68,9 @@ @(define-syntax-rule (cm-examples body ...) (examples #:eval (make-commonmark-eval) #:once body ...)) +@(define MediaWiki @hyperlink["https://www.mediawiki.org/"]{MediaWiki}) +@(define WikiLink @hyperlink["https://www.mediawiki.org/wiki/Help:Links#Internal_links"]{WikiLink}) + @defmodule[commonmark]{ The @racketmodname[commonmark] library implements a @|CommonMark|-compliant Markdown parser. Currently, it passes all test cases in @hyperlink["https://spec.commonmark.org/0.30/"]{v0.30 of the specification}. By default, only the Markdown features specified by @CommonMark are supported, but non-standard support for @tech{footnotes} can be optionally enabled; see the @secref{extensions} section of this manual for more details. @@ -145,6 +148,16 @@ Note that the value of @racket[current-parse-footnotes?] only affects parsing, @ @history[#:added "1.1"]} +@defboolparam[current-parse-wikilinks? parse-wikilinks? #:value #f]{ +Enables or disables the parsing of @|WikiLink|s. Specifically, settings this @reftech{parameter} to a value other than @racket[#f] enables the parsing of @MediaWiki internal links and piped links as @tech{wikilink}s. More complex MediaWiki link types, such as the pipe trick or word-ending links are not supported. + +Importantly, @tech{wikilink} parsing breaks compliance with the @CommonMark specification! + +@(cm-examples + #:label "Example:" + (parameterize ([current-parse-wikilinks? #t]) + (string->document "[[link]] and [[link|label]]")))} + @section[#:tag "rendering-html"]{Rendering HTML} @declare-exporting[commonmark/render/html commonmark] @defmodule[commonmark/render/html #:no-declare]{ @@ -310,7 +323,7 @@ A @deftech{thematic break} is a @tech{block}. It is usually rendered as a horizo @defproc[(inline? [v any/c]) boolean?]{ @see-cm[@tech{inline content} @cm-section{Blocks and inlines}] -Returns @racket[#t] if @racket[v] is @deftech{inline content}: a @reftech{string}, @tech{italic span}, @tech{bold span}, @tech{code span}, @tech{link}, @tech{image}, @tech{footnote reference}, @tech{HTML span}, @tech{hard line break}, or @reftech{list} of @tech{inline content}. Otherwise, returns @racket[#f].} +Returns @racket[#t] if @racket[v] is @deftech{inline content}: a @reftech{string}, @tech{italic span}, @tech{bold span}, @tech{code span}, @tech{link}, @tech{wikilink}, @tech{image}, @tech{footnote reference}, @tech{HTML span}, @tech{hard line break}, or @reftech{list} of @tech{inline content}. Otherwise, returns @racket[#f].} @defstruct*[italic ([content inline?]) #:transparent]{ @see-cm[@tech{italic spans} @cm-section{Emphasis and strong emphasis}] @@ -344,6 +357,12 @@ A @tech{footnote reference} is @tech{inline content} that references a @tech{foo @history[#:added "1.1"]} +@defstruct*[wikilink ([content inline?] [dest string?]) #:transparent]{ + +A @deftech{wikilink} is @tech{inline content} that contains nested @tech{inline content} and a link destination. Following the definitions of @|WikiLink|s in @MediaWiki, the content is identical to the link destination in case of internal links. + +In HTML output, a @tech{wikilink} corresponds to an @tt{} element.} + @defstruct*[html ([content string?]) #:transparent]{ @see-cm[@tech{HTML spans} @cm-section{Raw HTML}] diff --git a/commonmark-doc/scribblings/commonmark/private/scribble-render.rkt b/commonmark-doc/scribblings/commonmark/private/scribble-render.rkt index 2ccad49..64cb6cf 100644 --- a/commonmark-doc/scribblings/commonmark/private/scribble-render.rkt +++ b/commonmark-doc/scribblings/commonmark/private/scribble-render.rkt @@ -126,6 +126,8 @@ (link-element #f _ (footnote-definition-tag label)) (target-element #f _ (footnote-reference-tag label ref-num)) (element 'superscript))) + (define/override (render-wikilink content dest) + (element (style #f (list (make-target-url dest))) content)) (define/override (render-footnote-definition blocks label ref-count) (define multiple-refs? (> ref-count 1)) diff --git a/commonmark-lib/commonmark/parse.rkt b/commonmark-lib/commonmark/parse.rkt index f4e2b90..d9a495c 100644 --- a/commonmark-lib/commonmark/parse.rkt +++ b/commonmark-lib/commonmark/parse.rkt @@ -2,10 +2,12 @@ (require racket/contract "private/struct.rkt" - "private/parse/block.rkt") + "private/parse/block.rkt" + "private/parse/inline.rkt") (provide (contract-out [read-document (-> input-port? document?)] [string->document (-> string? document?)] - [current-parse-footnotes? (parameter/c any/c boolean?)])) + [current-parse-footnotes? (parameter/c any/c boolean?)] + [current-parse-wikilinks? (parameter/c any/c boolean?)])) diff --git a/commonmark-lib/commonmark/private/parse/inline.rkt b/commonmark-lib/commonmark/private/parse/inline.rkt index 3255b0d..e705074 100644 --- a/commonmark-lib/commonmark/private/parse/inline.rkt +++ b/commonmark-lib/commonmark/private/parse/inline.rkt @@ -10,6 +10,7 @@ "common.rkt") (provide string->inline + current-parse-wikilinks? (struct-out link-reference)) ;; ----------------------------------------------------------------------------- @@ -84,7 +85,30 @@ be wrong for multibyte characters. Fortunately, enabling line counting has the convenient side effect of tracking positions in characters rather than bytes, which explains why we need to call -`port-count-lines!` even though we never actually use line information. |# +`port-count-lines!` even though we never actually use line information. + + +Note [Nested WikiLinks] +~~~~~~~~~~~~~~~~~~~~~~~ +How should we handle nested WikiLinks, such as [[this [[nested]] example]]? + +MediaWiki, the engine behind Wikipedia, renders the above example as: + +

[[this nested example

+ +In other words, MediaWiki avoids nested links by only considering the inner-most +link. This is consistent with the handling of nested link in CommonMark. + +Other parsers handle nested WikiLinks differently. Obsidian, for instance, +matches double-brackets "greedily" and parses the above example as + +

this [[nested example]]

. + +Our implementation attempts to match the behavior of MediaWiki. However, we do +not (yet) handle more complex Media Wiki features, such as the "pipe trick": + |# + +(define current-parse-wikilinks? (make-parameter #f (λ (x) (and x #t)))) (struct link-reference (dest title) #:transparent) @@ -118,6 +142,31 @@ positions in characters rather than bytes, which explains why we need to call [(or (? eof-object?) 'link-close) (values '() node last-char #f)] + ['wikilink-open + (define open-text "[[") + (match-define-values [_ _ open-pos] (port-next-location in)) + (let loop ([last-char last-char] [nodes '()] [has-link? #f]) + (define-values [nodes* closer* last-char* has-link?*] (read-sequence last-char)) + (match closer* + [(? eof-object?) + (values (cons open-text (append nodes nodes*)) closer* last-char* has-link?*)] + ['link-close + (cond + [(eqv? (peek-char in) #\]) + (read-char in) + (cond + [(or has-link? has-link?*) + (match-define-values [nodes** closer** last-char** _] (read-sequence last-char*)) + (values (cons open-text (append nodes nodes* (cons "]]" nodes**))) closer** last-char** #t)] + [else + (match-define-values [_ _ close-pos] (port-next-location in)) + (define inner-text (substring str (sub1 open-pos) (- close-pos 3))) + (define node (parse-wikilink inner-text)) + (define-values [nodes** closer** last-char** has-link?**] (read-sequence last-char*)) + (values (cons node nodes**) closer** last-char** (or (wikilink? node) has-link?**))])] + [else + (loop last-char* (append nodes nodes* '("]")) (or has-link? has-link?*))])]))] + [(or 'link-open 'image-open) (define image? (eq? node 'image-open)) (define open-text (if image? "![" "[")) @@ -254,8 +303,16 @@ positions in characters rather than bytes, which explains why we need to call (values (delimiter-run c len len opener? closer?) c)] - ;; § 6.3 Links - [#\[ (read-char in) (values 'link-open #\[)] + ;; § 6.3 Links (and WikiLinks) + [#\[ + (read-char in) + (cond + [(and (current-parse-wikilinks?) + (eqv? (peek-char in) #\[)) + (read-char in) + (values 'wikilink-open #\[)] + [else + (values 'link-open #\[)])] [#\] (read-char in) (values 'link-close #\])] [#\! @@ -310,6 +367,37 @@ positions in characters rather than bytes, which explains why we need to call "next char" (peek-char in) "expected regexp" rx)])) + (define (parse-wikilink inner-text) + (define (process-label label-text) + (define label-node + (string->inline label-text + #:link-defns #hash() + #:footnote-defns #hash())) + (process-emphasis (list label-node))) + + (define (string-split-on-first str char) + (define char-pos + (for/first + ([p (in-naturals)] + [c (in-string str)] + #:when (char=? char c)) + p)) + + (if char-pos + (let ([head (substring str 0 char-pos)] + [tail (substring str (add1 char-pos))]) + (cons head tail)) + (cons str ""))) + + (match (string-split-on-first inner-text #\|) + ; "[[]]" and "[[|Link]" are not parsed as links. + [(cons "" _) (string-append "[[" inner-text "]]")] + ; "[[Link]]" and "[[Link|]]" are parsed as links with target and link "Link". + [(cons target+label "") (wikilink (process-label target+label) target+label)] + ; "[[Target|Label]]" is parsed as link with target "Target" and label "Label". + ; "[[Target|La|bel]]" is parsed as link with target "Target" and label "La|bel". + [(cons target label) (wikilink (process-label label) target)])) + (define (try-read-link-target content-label-str) (or ;; Full reference links diff --git a/commonmark-lib/commonmark/private/render.rkt b/commonmark-lib/commonmark/private/render.rkt index 3ac8998..d5fa110 100644 --- a/commonmark-lib/commonmark/private/render.rkt +++ b/commonmark-lib/commonmark/private/render.rkt @@ -86,6 +86,7 @@ render-image render-html render-footnote-reference + render-wikilink render-footnote-definition) @@ -156,7 +157,9 @@ (render-html content)] [(footnote-reference label) (match-define (footnote-info defn-num ref-num) (resolve-footnote-reference label)) - (render-footnote-reference label defn-num ref-num)])))) + (render-footnote-reference label defn-num ref-num)] + [(wikilink content dest) + (render-wikilink (render-inline content) dest)])))) (define/public (render-inlines contents) (for*/list ([content (in-list contents)] diff --git a/commonmark-lib/commonmark/private/struct.rkt b/commonmark-lib/commonmark/private/struct.rkt index 5806925..0858f62 100644 --- a/commonmark-lib/commonmark/private/struct.rkt +++ b/commonmark-lib/commonmark/private/struct.rkt @@ -20,7 +20,8 @@ (struct-out link) (struct-out image) (struct-out html) - (struct-out footnote-reference)) + (struct-out footnote-reference) + (struct-out wikilink)) ;; ----------------------------------------------------------------------------- @@ -57,7 +58,8 @@ (link? v) (image? v) (html? v) - (footnote-reference? v))) + (footnote-reference? v) + (wikilink? v))) (define-values [line-break line-break?] (let () @@ -70,3 +72,4 @@ (struct image (description source title) #:transparent) (struct html (content) #:transparent) (struct footnote-reference (label) #:transparent) +(struct wikilink (content dest) #:transparent) diff --git a/commonmark-lib/commonmark/render/html.rkt b/commonmark-lib/commonmark/render/html.rkt index fd44525..5c536b6 100644 --- a/commonmark-lib/commonmark/render/html.rkt +++ b/commonmark-lib/commonmark/render/html.rkt @@ -98,6 +98,8 @@ (a ([id ,(footnote-reference-anchor label ref-num)] [href ,(~a "#" (footnote-definition-anchor (uri-path-segment-encode label)))]) ,(~a defn-num)))) + (define/override (render-wikilink content dest) + `(a ([href ,dest]) ,@content)) (define/override (render-footnote-definition blocks label ref-count) (define encoded-label (uri-path-segment-encode label)) diff --git a/commonmark-lib/commonmark/struct.rkt b/commonmark-lib/commonmark/struct.rkt index 252553c..e0aa072 100644 --- a/commonmark-lib/commonmark/struct.rkt +++ b/commonmark-lib/commonmark/struct.rkt @@ -26,4 +26,5 @@ (struct link ([content inline?] [dest string?] [title (or/c string? #f)])) (struct image ([description inline?] [source string?] [title (or/c string? #f)])) (struct html ([content string?])) - (struct footnote-reference ([label string?])))) + (struct footnote-reference ([label string?])) + (struct wikilink ([content inline?] [dest string?])))) diff --git a/commonmark-test/tests/commonmark/parse/wikilink.rkt b/commonmark-test/tests/commonmark/parse/wikilink.rkt new file mode 100644 index 0000000..9910610 --- /dev/null +++ b/commonmark-test/tests/commonmark/parse/wikilink.rkt @@ -0,0 +1,19 @@ +#lang racket/base + +(require commonmark + commonmark/struct + rackunit) + +(parameterize ([current-parse-wikilinks? #t]) + (check-equal? (string->document "[[example]]") + (document (list (paragraph (wikilink "example" "example"))) '())) + (check-equal? (string->document "[[destination|label]]") + (document (list (paragraph (wikilink "label" "destination"))) '())) + (check-equal? (string->document "[[destination|label with **bold** markup]]") + (document (list (paragraph (wikilink (list "label with " (bold "bold") " markup") "destination"))) '())) + (check-equal? (string->document "[[lorem [[link]] ipsum]]") + (document (list (paragraph (list "[[lorem " (wikilink "link" "link") " ipsum]]"))) '())) + (check-equal? (string->document "[[unclosed") + (document (list (paragraph "[[unclosed")) '())) + (check-equal? (string->document "[[]]") + (document (list (paragraph "[[]]")) '()))) \ No newline at end of file