Add support for redirect link anchors in print page

HollowMan6 · HollowMan6 · commit 6cfb971a25f6 · 2022-02-09T12:56:08.000+08:00
So that anchors can also be redirected

Signed-off-by: Hollow Man &lt;hollowman@opensuse.org&gt;
diff --git a/src/renderer/html_handlebars/hbs_renderer.rs b/src/renderer/html_handlebars/hbs_renderer.rs
@@ -54,8 +54,12 @@ impl HtmlHandlebars {
         let content = ch.content.clone();
         let content = utils::render_markdown(&content, ctx.html_config.curly_quotes);
 
-        let fixed_content =
-            utils::render_markdown_with_path(&ch.content, ctx.html_config.curly_quotes, Some(path));
+        let fixed_content = utils::render_markdown_with_path(
+            &ch.content,
+            ctx.html_config.curly_quotes,
+            Some(path),
+            ctx.html_config.redirect,
+        );
         if !ctx.is_index && ctx.html_config.print.page_break {
             // Add page break between chapters
             // See https://developer.mozilla.org/en-US/docs/Web/CSS/break-before and https://developer.mozilla.org/en-US/docs/Web/CSS/page-break-before
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
@@ -9,6 +9,7 @@ use regex::Regex;
 use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
 
 use std::borrow::Cow;
+use std::collections::HashMap;
 use std::fmt::Write;
 use std::path::{Component, Path, PathBuf};
 
@@ -64,16 +65,15 @@ pub fn id_from_content(content: &str) -> String {
 }
 
 /// https://stackoverflow.com/a/68233480
-/// Improve the path to try remove and solve .. token. Return the path id
-/// by replacing the directory separator with a hyphen.
+/// Improve the path to try remove and solve .. token.
 ///
 /// This assumes that `a/b/../c` is `a/c` which might be different from
 /// what the OS would have chosen when b is a link. This is OK
 /// for broot verb arguments but can't be generally used elsewhere
 ///
-/// This function ensures a given path ending with '/' will
-/// end with '-' after normalization.
-pub fn normalize_path_id<P: AsRef<Path>>(path: P) -> String {
+/// This function ensures a given path ending with '/' will also
+/// end with '/' after normalization.
+pub fn normalize_path<P: AsRef<Path>>(path: P) -> String {
     let ends_with_slash = path.as_ref().to_str().map_or(false, |s| s.ends_with('/'));
     let mut normalized = PathBuf::new();
     for component in path.as_ref().components() {
@@ -92,11 +92,19 @@ pub fn normalize_path_id<P: AsRef<Path>>(path: P) -> String {
     if ends_with_slash {
         normalized.push("");
     }
-    normalized
-        .to_str()
-        .unwrap()
-        .replace("\\", "-")
+    normalized.to_str().unwrap().replace("\\", "/").to_string()
+}
+
+/// Return the normalized path id.
+pub fn normalize_path_id(mut path: String) -> String {
+    path = path
         .replace("/", "-")
+        .replace(".html#", "-")
+        .replace("#", "-");
+    if path.ends_with(".html") {
+        path.replace_range(path.len() - 5.., "");
+    }
+    path
 }
 
 /// Fix links to the correct location.
@@ -107,7 +115,11 @@ pub fn normalize_path_id<P: AsRef<Path>>(path: P) -> String {
 /// book. This is used for the `print.html` page so that links on the print
 /// page go to the anchors that has a path id prefix. Normal page rendering
 /// sets `path` to None.
-fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
+fn adjust_links<'a>(
+    event: Event<'a>,
+    path: Option<&Path>,
+    redirects: HashMap<String, String>,
+) -> Event<'a> {
     lazy_static! {
         static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap();
         static ref MD_LINK: Regex = Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap();
@@ -136,15 +148,24 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
         dest
     }
 
-    fn fix_a_links<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
+    fn fix_a_links<'a>(
+        dest: CowStr<'a>,
+        path: Option<&Path>,
+        redirects: HashMap<String, String>,
+    ) -> CowStr<'a> {
         if dest.starts_with('#') {
             // Fragment-only link.
             if let Some(path) = path {
                 let mut base = path.display().to_string();
                 if base.ends_with(".md") {
                     base.replace_range(base.len() - 3.., "");
                 }
-                return format!("#{}{}", normalize_path_id(base), dest.replace("#", "-")).into();
+                return format!(
+                    "#{}{}",
+                    normalize_path_id(normalize_path(base)),
+                    dest.replace("#", "-")
+                )
+                .into();
             } else {
                 return dest;
             }
@@ -174,16 +195,43 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
                 fixed_link.push_str(&dest);
             };
 
-            let path_id = normalize_path_id(&fixed_link)
-                .replace(".html", "")
-                .replace("#", "-");
+            let mut normalized_path = normalize_path(&fixed_link);
+
             // Judge if the html link is inside the book.
-            if !path_id.contains("..") {
+            if !normalized_path.starts_with("../") && !normalized_path.contains("/../") {
+                // In `print.html`, print page links would all link to anchors on the print page.
                 if let Some(_) = path {
-                    // In `print.html`, print page links would all link to anchors on the print page.
+                    // Fix redirect links
+                    let normalized_path_split: Vec<&str> = normalized_path.split('#').collect();
+                    for (original, redirect) in &redirects {
+                        if !SCHEME_LINK.is_match(&redirect)
+                            && normalize_path(original.trim_start_matches('/'))
+                                .eq_ignore_ascii_case(&normalize_path(normalized_path_split[0]))
+                        {
+                            let mut unnormalized_path = String::new();
+                            let base = PathBuf::from(normalized_path_split[0])
+                                .parent()
+                                .expect("path can't be empty")
+                                .to_str()
+                                .expect("utf-8 paths only")
+                                .to_owned();
+                            write!(unnormalized_path, "{}/{}", normalize_path(base), redirect)
+                                .unwrap();
+                            for i in 1..normalized_path_split.len() {
+                                unnormalized_path.push('#');
+                                unnormalized_path.push_str(normalized_path_split[i]);
+                            }
+                            normalized_path = normalize_path(unnormalized_path);
+                            break;
+                        }
+                    }
+                    // Check again to make sure anchors are the html links inside the book.
+                    if normalized_path.starts_with("../") || normalized_path.contains("/../") {
+                        return CowStr::from(normalized_path);
+                    }
                     let mut fixed_anchor_for_print = String::new();
                     fixed_anchor_for_print.push_str("#");
-                    fixed_anchor_for_print.push_str(&path_id);
+                    fixed_anchor_for_print.push_str(&normalize_path_id(normalized_path));
                     return CowStr::from(fixed_anchor_for_print);
                 }
             }
@@ -193,7 +241,11 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
         dest
     }
 
-    fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
+    fn fix_html<'a>(
+        html: CowStr<'a>,
+        path: Option<&Path>,
+        redirects: HashMap<String, String>,
+    ) -> CowStr<'a> {
         // This is a terrible hack, but should be reasonably reliable. Nobody
         // should ever parse a tag with a regex. However, there isn't anything
         // in Rust that I know of that is suitable for handling partial html
@@ -216,28 +268,30 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
 
         A_LINK
             .replace_all(&temp_html, |caps: &regex::Captures<'_>| {
-                let fixed = fix_a_links(caps[2].into(), path);
+                let fixed = fix_a_links(caps[2].into(), path, redirects.clone());
                 format!("{}{}\"", &caps[1], fixed)
             })
             .into_owned()
             .into()
     }
 
     match event {
-        Event::Start(Tag::Link(link_type, dest, title)) => {
-            Event::Start(Tag::Link(link_type, fix_a_links(dest, path), title))
-        }
+        Event::Start(Tag::Link(link_type, dest, title)) => Event::Start(Tag::Link(
+            link_type,
+            fix_a_links(dest, path, redirects),
+            title,
+        )),
         Event::Start(Tag::Image(link_type, dest, title)) => {
             Event::Start(Tag::Image(link_type, fix(dest, path), title))
         }
-        Event::Html(html) => Event::Html(fix_html(html, path)),
+        Event::Html(html) => Event::Html(fix_html(html, path, redirects)),
         _ => event,
     }
 }
 
 /// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
 pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
-    render_markdown_with_path(text, curly_quotes, None)
+    render_markdown_with_path(text, curly_quotes, None, HashMap::new())
 }
 
 pub fn new_cmark_parser(text: &str, curly_quotes: bool) -> Parser<'_, '_> {
@@ -252,12 +306,17 @@ pub fn new_cmark_parser(text: &str, curly_quotes: bool) -> Parser<'_, '_> {
     Parser::new_ext(text, opts)
 }
 
-pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String {
+pub fn render_markdown_with_path(
+    text: &str,
+    curly_quotes: bool,
+    path: Option<&Path>,
+    redirects: HashMap<String, String>,
+) -> String {
     let mut s = String::with_capacity(text.len() * 3 / 2);
     let p = new_cmark_parser(text, curly_quotes);
     let events = p
         .map(clean_codeblock_headers)
-        .map(|event| adjust_links(event, path));
+        .map(|event| adjust_links(event, path, redirects.clone()));
 
     html::push_html(&mut s, events);
     s