Skip to content

Commit 6cfb971

Browse files
committed
Add support for redirect link anchors in print page
So that anchors can also be redirected Signed-off-by: Hollow Man <[email protected]>
1 parent 6e290f9 commit 6cfb971

File tree

2 files changed

+92
-29
lines changed

2 files changed

+92
-29
lines changed

src/renderer/html_handlebars/hbs_renderer.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,12 @@ impl HtmlHandlebars {
5454
let content = ch.content.clone();
5555
let content = utils::render_markdown(&content, ctx.html_config.curly_quotes);
5656

57-
let fixed_content =
58-
utils::render_markdown_with_path(&ch.content, ctx.html_config.curly_quotes, Some(path));
57+
let fixed_content = utils::render_markdown_with_path(
58+
&ch.content,
59+
ctx.html_config.curly_quotes,
60+
Some(path),
61+
ctx.html_config.redirect,
62+
);
5963
if !ctx.is_index && ctx.html_config.print.page_break {
6064
// Add page break between chapters
6165
// See https://developer.mozilla.org/en-US/docs/Web/CSS/break-before and https://developer.mozilla.org/en-US/docs/Web/CSS/page-break-before

src/utils/mod.rs

Lines changed: 86 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use regex::Regex;
99
use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
1010

1111
use std::borrow::Cow;
12+
use std::collections::HashMap;
1213
use std::fmt::Write;
1314
use std::path::{Component, Path, PathBuf};
1415

@@ -64,16 +65,15 @@ pub fn id_from_content(content: &str) -> String {
6465
}
6566

6667
/// https://stackoverflow.com/a/68233480
67-
/// Improve the path to try remove and solve .. token. Return the path id
68-
/// by replacing the directory separator with a hyphen.
68+
/// Improve the path to try remove and solve .. token.
6969
///
7070
/// This assumes that `a/b/../c` is `a/c` which might be different from
7171
/// what the OS would have chosen when b is a link. This is OK
7272
/// for broot verb arguments but can't be generally used elsewhere
7373
///
74-
/// This function ensures a given path ending with '/' will
75-
/// end with '-' after normalization.
76-
pub fn normalize_path_id<P: AsRef<Path>>(path: P) -> String {
74+
/// This function ensures a given path ending with '/' will also
75+
/// end with '/' after normalization.
76+
pub fn normalize_path<P: AsRef<Path>>(path: P) -> String {
7777
let ends_with_slash = path.as_ref().to_str().map_or(false, |s| s.ends_with('/'));
7878
let mut normalized = PathBuf::new();
7979
for component in path.as_ref().components() {
@@ -92,11 +92,19 @@ pub fn normalize_path_id<P: AsRef<Path>>(path: P) -> String {
9292
if ends_with_slash {
9393
normalized.push("");
9494
}
95-
normalized
96-
.to_str()
97-
.unwrap()
98-
.replace("\\", "-")
95+
normalized.to_str().unwrap().replace("\\", "/").to_string()
96+
}
97+
98+
/// Return the normalized path id.
99+
pub fn normalize_path_id(mut path: String) -> String {
100+
path = path
99101
.replace("/", "-")
102+
.replace(".html#", "-")
103+
.replace("#", "-");
104+
if path.ends_with(".html") {
105+
path.replace_range(path.len() - 5.., "");
106+
}
107+
path
100108
}
101109

102110
/// Fix links to the correct location.
@@ -107,7 +115,11 @@ pub fn normalize_path_id<P: AsRef<Path>>(path: P) -> String {
107115
/// book. This is used for the `print.html` page so that links on the print
108116
/// page go to the anchors that has a path id prefix. Normal page rendering
109117
/// sets `path` to None.
110-
fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
118+
fn adjust_links<'a>(
119+
event: Event<'a>,
120+
path: Option<&Path>,
121+
redirects: HashMap<String, String>,
122+
) -> Event<'a> {
111123
lazy_static! {
112124
static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap();
113125
static ref MD_LINK: Regex = Regex::new(r"(?P<link>.*)\.md(?P<anchor>#.*)?").unwrap();
@@ -136,15 +148,24 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
136148
dest
137149
}
138150

139-
fn fix_a_links<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
151+
fn fix_a_links<'a>(
152+
dest: CowStr<'a>,
153+
path: Option<&Path>,
154+
redirects: HashMap<String, String>,
155+
) -> CowStr<'a> {
140156
if dest.starts_with('#') {
141157
// Fragment-only link.
142158
if let Some(path) = path {
143159
let mut base = path.display().to_string();
144160
if base.ends_with(".md") {
145161
base.replace_range(base.len() - 3.., "");
146162
}
147-
return format!("#{}{}", normalize_path_id(base), dest.replace("#", "-")).into();
163+
return format!(
164+
"#{}{}",
165+
normalize_path_id(normalize_path(base)),
166+
dest.replace("#", "-")
167+
)
168+
.into();
148169
} else {
149170
return dest;
150171
}
@@ -174,16 +195,43 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
174195
fixed_link.push_str(&dest);
175196
};
176197

177-
let path_id = normalize_path_id(&fixed_link)
178-
.replace(".html", "")
179-
.replace("#", "-");
198+
let mut normalized_path = normalize_path(&fixed_link);
199+
180200
// Judge if the html link is inside the book.
181-
if !path_id.contains("..") {
201+
if !normalized_path.starts_with("../") && !normalized_path.contains("/../") {
202+
// In `print.html`, print page links would all link to anchors on the print page.
182203
if let Some(_) = path {
183-
// In `print.html`, print page links would all link to anchors on the print page.
204+
// Fix redirect links
205+
let normalized_path_split: Vec<&str> = normalized_path.split('#').collect();
206+
for (original, redirect) in &redirects {
207+
if !SCHEME_LINK.is_match(&redirect)
208+
&& normalize_path(original.trim_start_matches('/'))
209+
.eq_ignore_ascii_case(&normalize_path(normalized_path_split[0]))
210+
{
211+
let mut unnormalized_path = String::new();
212+
let base = PathBuf::from(normalized_path_split[0])
213+
.parent()
214+
.expect("path can't be empty")
215+
.to_str()
216+
.expect("utf-8 paths only")
217+
.to_owned();
218+
write!(unnormalized_path, "{}/{}", normalize_path(base), redirect)
219+
.unwrap();
220+
for i in 1..normalized_path_split.len() {
221+
unnormalized_path.push('#');
222+
unnormalized_path.push_str(normalized_path_split[i]);
223+
}
224+
normalized_path = normalize_path(unnormalized_path);
225+
break;
226+
}
227+
}
228+
// Check again to make sure anchors are the html links inside the book.
229+
if normalized_path.starts_with("../") || normalized_path.contains("/../") {
230+
return CowStr::from(normalized_path);
231+
}
184232
let mut fixed_anchor_for_print = String::new();
185233
fixed_anchor_for_print.push_str("#");
186-
fixed_anchor_for_print.push_str(&path_id);
234+
fixed_anchor_for_print.push_str(&normalize_path_id(normalized_path));
187235
return CowStr::from(fixed_anchor_for_print);
188236
}
189237
}
@@ -193,7 +241,11 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
193241
dest
194242
}
195243

196-
fn fix_html<'a>(html: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> {
244+
fn fix_html<'a>(
245+
html: CowStr<'a>,
246+
path: Option<&Path>,
247+
redirects: HashMap<String, String>,
248+
) -> CowStr<'a> {
197249
// This is a terrible hack, but should be reasonably reliable. Nobody
198250
// should ever parse a tag with a regex. However, there isn't anything
199251
// in Rust that I know of that is suitable for handling partial html
@@ -216,28 +268,30 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
216268

217269
A_LINK
218270
.replace_all(&temp_html, |caps: &regex::Captures<'_>| {
219-
let fixed = fix_a_links(caps[2].into(), path);
271+
let fixed = fix_a_links(caps[2].into(), path, redirects.clone());
220272
format!("{}{}\"", &caps[1], fixed)
221273
})
222274
.into_owned()
223275
.into()
224276
}
225277

226278
match event {
227-
Event::Start(Tag::Link(link_type, dest, title)) => {
228-
Event::Start(Tag::Link(link_type, fix_a_links(dest, path), title))
229-
}
279+
Event::Start(Tag::Link(link_type, dest, title)) => Event::Start(Tag::Link(
280+
link_type,
281+
fix_a_links(dest, path, redirects),
282+
title,
283+
)),
230284
Event::Start(Tag::Image(link_type, dest, title)) => {
231285
Event::Start(Tag::Image(link_type, fix(dest, path), title))
232286
}
233-
Event::Html(html) => Event::Html(fix_html(html, path)),
287+
Event::Html(html) => Event::Html(fix_html(html, path, redirects)),
234288
_ => event,
235289
}
236290
}
237291

238292
/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
239293
pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
240-
render_markdown_with_path(text, curly_quotes, None)
294+
render_markdown_with_path(text, curly_quotes, None, HashMap::new())
241295
}
242296

243297
pub fn new_cmark_parser(text: &str, curly_quotes: bool) -> Parser<'_, '_> {
@@ -252,12 +306,17 @@ pub fn new_cmark_parser(text: &str, curly_quotes: bool) -> Parser<'_, '_> {
252306
Parser::new_ext(text, opts)
253307
}
254308

255-
pub fn render_markdown_with_path(text: &str, curly_quotes: bool, path: Option<&Path>) -> String {
309+
pub fn render_markdown_with_path(
310+
text: &str,
311+
curly_quotes: bool,
312+
path: Option<&Path>,
313+
redirects: HashMap<String, String>,
314+
) -> String {
256315
let mut s = String::with_capacity(text.len() * 3 / 2);
257316
let p = new_cmark_parser(text, curly_quotes);
258317
let events = p
259318
.map(clean_codeblock_headers)
260-
.map(|event| adjust_links(event, path));
319+
.map(|event| adjust_links(event, path, redirects.clone()));
261320

262321
html::push_html(&mut s, events);
263322
s

0 commit comments

Comments
 (0)