diff --git a/notebooks/parsing_extensibility.clj b/notebooks/parsing_extensibility.clj index 5bb29602..01a4e17e 100644 --- a/notebooks/parsing_extensibility.clj +++ b/notebooks/parsing_extensibility.clj @@ -2,11 +2,15 @@ (ns parsing-extensibility {:nextjournal.clerk/toc :collapsed :nextjournal.clerk/no-cache true} - (:require [nextjournal.clerk :as clerk] - [nextjournal.markdown :as md] - [nextjournal.markdown.parser :as md.parser] - [edamame.core :as edamame] - [clojure.string :as str])) + (:require + [clojuer.java.io :as io] + [clojure.core.async :as async] + [clojure.string :as str] + [edamame.core :as edamame] + [nextjournal.clerk :as clerk] + [nextjournal.markdown :as md] + [nextjournal.markdown.parser :as md.parser] + [nextjournal.markdown.transform :as md.transform])) ^{:nextjournal.clerk/visibility {:code :hide :result :hide}} (def show-text @@ -31,12 +35,120 @@ (md.parser/tokenize-text-node internal-link-tokenizer {} {:text "some [[set]] of [[wiki]] link"}) - ;; In order to opt-in of the extra tokenization above, we need to configure the document context as follows: (md/parse (update md.parser/empty-doc :text-tokenizers conj internal-link-tokenizer) "some [[set]] of [[wiki]] link") ;; We provide an `internal-link-tokenizer` as well as a `hashtag-tokenizer` as part of the `nextjournal.markdown.parser` namespace. By default, these are not used during parsing and need to be opted-in for like explained above. +;; +;; ### Example #2: custom emoji +;; +;; Another, sligthly more complex example is extending the Hiccup transformer, +;; so that we can add emojis (like :smile:) to the text. +;; +;; Assuming we have multiple emoji packs installed, we need to first build a data +;; structure to hold the metadata about the emoji, like this: +'([:memes [{:name "bonk" + :path "img/emoji/memes/bonk.png"}]] + [:yellow_ball [{:name "yb-sunglasses" + :path "img/emoji/yellow_ball/yb-sunglasses.png"}]]) + +(defonce emoji-dir "resources/public/img/emotes") + +(defn scan-emoji-dir + "Scans the emoji directory and returns a map of emoji info. + The map is a vector of composite map vectors" + [] + (let [root (io/file emoji-dir) + packs (file-seq root)] + (mapcat (fn [pack] + (when (.isDirectory pack) + (let [pack-name (.getName pack) + files (file-seq pack) + emoji-names (mapv (fn [file] + (-> file + .getName + (str/split #"\." 2) ; trim the extension naively + first)) + files) + ;; you can remove str/replace call here if it won't cause issues with + ;; your public resources path not being accessible to the client + paths (mapv #(str/replace (.getPath %) #"resources/public" "") files)] + (assoc {} (keyword pack-name) (mapv #(assoc {} :name % :path %2) emoji-names paths))))) + packs))) + +;; avoid re-scanning the directory on every request +(def emoji (delay (scan-emoji-dir))) + +;; Next, we need to look up our emoji data we just grabbed. +;; Now, depending on whether you want better performance and less risk +;; of conflicts, you might want to require fully qualified emoji names, +;; at the expense of some user inconvenience. +;; This function makes that configurable +(def unqualified-emoji-names? true) + +(defn find-emoji-info + "Looks up the emoji info for an emoji token. + If unqualified names are enabled in the settings, will + sift through all packs. Discards duplicates." + [emoji-token] + (let [[pack name] (str/split emoji-token #"\." 2) + emoji-map @emoji] + (if unqualified-emoji-names? + (->> emoji-map + vals + (apply concat) + (filter #(= emoji-token (:name %))) + first) + (get-in emoji-map [(keyword pack) name])))) + +;; Finally, we can create our handler and renderer +(defn emoji-handler [match] + (let [emoji-name (second match) + emote-info (find-emoji-info emoji-name)] + (if emote-info + {:type :emoji + :tag :img + :attrs {:src (:path emote-info) + :alt (:name emote-info) + :style {:max-width "2.5rem"}}} + {:type :text + :text (str ":" emoji-name ":")}))) + +(def ^:private emoji-tokenizer + (md.parser/normalize-tokenizer + {:regex #":([a-zA-Z0-9_\-.]+):" + :handler emoji-handler})) + +;; Assuming we're using Tailwind CSS. +;; Otherwise, add appropriate styles in `emoji-handler` +(defn emoji-renderer + [ctx node] + (let [params (:attrs node)] + [:img.inline-flex params])) + +;; Finally, we can try to offset the performance hit of the emoji lookup by adding some +;; asynchrony and timeouts. It's up to you to add spinners, error messages, +;; fallback values etc. to your Hiccup template. +(defn parse-message + "Parses message's formatting (extended markdown, see docs) + and returns a rum template" + [message] + (async/go + (try + (let [parsing-chan (async/go (md.parser/parse + ;; add the emoji tokenizer to text tokenizers + (update md.parser/empty-doc :text-tokenizers conj emoji-tokenizer) + (md/tokenize message))) + timeout-chan (async/timeout 2000) + ;; add the renderer to the default renderers map + renderers (assoc md.transform/default-hiccup-renderers :emoji emoji-renderer) + [result port] (async/alts! [parsing-chan timeout-chan])] + (if (= port timeout-chan) + {:error "Parsing timed out"} + (md.transform/->hiccup renderers result))) + (catch Exception e + {:error (str "Error parsing message: " (.getMessage e))})))) ;; ## Read-based tokenization ;;