@@ -9,6 +9,7 @@ use regex::Regex;
9
9
use pulldown_cmark:: { html, CodeBlockKind , CowStr , Event , Options , Parser , Tag } ;
10
10
11
11
use std:: borrow:: Cow ;
12
+ use std:: collections:: HashMap ;
12
13
use std:: fmt:: Write ;
13
14
use std:: path:: { Component , Path , PathBuf } ;
14
15
@@ -64,16 +65,15 @@ pub fn id_from_content(content: &str) -> String {
64
65
}
65
66
66
67
/// https://stackoverflow.com/a/68233480
67
- /// Improve the path to try remove and solve .. token. Return the path id
68
- /// by replacing the directory separator with a hyphen.
68
+ /// Improve the path to try remove and solve .. token.
69
69
///
70
70
/// This assumes that `a/b/../c` is `a/c` which might be different from
71
71
/// what the OS would have chosen when b is a link. This is OK
72
72
/// for broot verb arguments but can't be generally used elsewhere
73
73
///
74
- /// This function ensures a given path ending with '/' will
75
- /// end with '- ' after normalization.
76
- pub fn normalize_path_id < P : AsRef < Path > > ( path : P ) -> String {
74
+ /// This function ensures a given path ending with '/' will also
75
+ /// end with '/ ' after normalization.
76
+ pub fn normalize_path < P : AsRef < Path > > ( path : P ) -> String {
77
77
let ends_with_slash = path. as_ref ( ) . to_str ( ) . map_or ( false , |s| s. ends_with ( '/' ) ) ;
78
78
let mut normalized = PathBuf :: new ( ) ;
79
79
for component in path. as_ref ( ) . components ( ) {
@@ -92,11 +92,19 @@ pub fn normalize_path_id<P: AsRef<Path>>(path: P) -> String {
92
92
if ends_with_slash {
93
93
normalized. push ( "" ) ;
94
94
}
95
- normalized
96
- . to_str ( )
97
- . unwrap ( )
98
- . replace ( "\\ " , "-" )
95
+ normalized. to_str ( ) . unwrap ( ) . replace ( "\\ " , "/" ) . to_string ( )
96
+ }
97
+
98
+ /// Return the normalized path id.
99
+ pub fn normalize_path_id ( mut path : String ) -> String {
100
+ path = path
99
101
. replace ( "/" , "-" )
102
+ . replace ( ".html#" , "-" )
103
+ . replace ( "#" , "-" ) ;
104
+ if path. ends_with ( ".html" ) {
105
+ path. replace_range ( path. len ( ) - 5 .., "" ) ;
106
+ }
107
+ path
100
108
}
101
109
102
110
/// Fix links to the correct location.
@@ -107,7 +115,11 @@ pub fn normalize_path_id<P: AsRef<Path>>(path: P) -> String {
107
115
/// book. This is used for the `print.html` page so that links on the print
108
116
/// page go to the anchors that has a path id prefix. Normal page rendering
109
117
/// sets `path` to None.
110
- fn adjust_links < ' a > ( event : Event < ' a > , path : Option < & Path > ) -> Event < ' a > {
118
+ fn adjust_links < ' a > (
119
+ event : Event < ' a > ,
120
+ path : Option < & Path > ,
121
+ redirects : HashMap < String , String > ,
122
+ ) -> Event < ' a > {
111
123
lazy_static ! {
112
124
static ref SCHEME_LINK : Regex = Regex :: new( r"^[a-z][a-z0-9+.-]*:" ) . unwrap( ) ;
113
125
static ref MD_LINK : Regex = Regex :: new( r"(?P<link>.*)\.md(?P<anchor>#.*)?" ) . unwrap( ) ;
@@ -136,15 +148,24 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
136
148
dest
137
149
}
138
150
139
- fn fix_a_links < ' a > ( dest : CowStr < ' a > , path : Option < & Path > ) -> CowStr < ' a > {
151
+ fn fix_a_links < ' a > (
152
+ dest : CowStr < ' a > ,
153
+ path : Option < & Path > ,
154
+ redirects : HashMap < String , String > ,
155
+ ) -> CowStr < ' a > {
140
156
if dest. starts_with ( '#' ) {
141
157
// Fragment-only link.
142
158
if let Some ( path) = path {
143
159
let mut base = path. display ( ) . to_string ( ) ;
144
160
if base. ends_with ( ".md" ) {
145
161
base. replace_range ( base. len ( ) - 3 .., "" ) ;
146
162
}
147
- return format ! ( "#{}{}" , normalize_path_id( base) , dest. replace( "#" , "-" ) ) . into ( ) ;
163
+ return format ! (
164
+ "#{}{}" ,
165
+ normalize_path_id( normalize_path( base) ) ,
166
+ dest. replace( "#" , "-" )
167
+ )
168
+ . into ( ) ;
148
169
} else {
149
170
return dest;
150
171
}
@@ -174,16 +195,43 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
174
195
fixed_link. push_str ( & dest) ;
175
196
} ;
176
197
177
- let path_id = normalize_path_id ( & fixed_link)
178
- . replace ( ".html" , "" )
179
- . replace ( "#" , "-" ) ;
198
+ let mut normalized_path = normalize_path ( & fixed_link) ;
199
+
180
200
// Judge if the html link is inside the book.
181
- if !path_id. contains ( ".." ) {
201
+ if !normalized_path. starts_with ( "../" ) && !normalized_path. contains ( "/../" ) {
202
+ // In `print.html`, print page links would all link to anchors on the print page.
182
203
if let Some ( _) = path {
183
- // In `print.html`, print page links would all link to anchors on the print page.
204
+ // Fix redirect links
205
+ let normalized_path_split: Vec < & str > = normalized_path. split ( '#' ) . collect ( ) ;
206
+ for ( original, redirect) in & redirects {
207
+ if !SCHEME_LINK . is_match ( & redirect)
208
+ && normalize_path ( original. trim_start_matches ( '/' ) )
209
+ . eq_ignore_ascii_case ( & normalize_path ( normalized_path_split[ 0 ] ) )
210
+ {
211
+ let mut unnormalized_path = String :: new ( ) ;
212
+ let base = PathBuf :: from ( normalized_path_split[ 0 ] )
213
+ . parent ( )
214
+ . expect ( "path can't be empty" )
215
+ . to_str ( )
216
+ . expect ( "utf-8 paths only" )
217
+ . to_owned ( ) ;
218
+ write ! ( unnormalized_path, "{}/{}" , normalize_path( base) , redirect)
219
+ . unwrap ( ) ;
220
+ for i in 1 ..normalized_path_split. len ( ) {
221
+ unnormalized_path. push ( '#' ) ;
222
+ unnormalized_path. push_str ( normalized_path_split[ i] ) ;
223
+ }
224
+ normalized_path = normalize_path ( unnormalized_path) ;
225
+ break ;
226
+ }
227
+ }
228
+ // Check again to make sure anchors are the html links inside the book.
229
+ if normalized_path. starts_with ( "../" ) || normalized_path. contains ( "/../" ) {
230
+ return CowStr :: from ( normalized_path) ;
231
+ }
184
232
let mut fixed_anchor_for_print = String :: new ( ) ;
185
233
fixed_anchor_for_print. push_str ( "#" ) ;
186
- fixed_anchor_for_print. push_str ( & path_id ) ;
234
+ fixed_anchor_for_print. push_str ( & normalize_path_id ( normalized_path ) ) ;
187
235
return CowStr :: from ( fixed_anchor_for_print) ;
188
236
}
189
237
}
@@ -193,7 +241,11 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
193
241
dest
194
242
}
195
243
196
- fn fix_html < ' a > ( html : CowStr < ' a > , path : Option < & Path > ) -> CowStr < ' a > {
244
+ fn fix_html < ' a > (
245
+ html : CowStr < ' a > ,
246
+ path : Option < & Path > ,
247
+ redirects : HashMap < String , String > ,
248
+ ) -> CowStr < ' a > {
197
249
// This is a terrible hack, but should be reasonably reliable. Nobody
198
250
// should ever parse a tag with a regex. However, there isn't anything
199
251
// in Rust that I know of that is suitable for handling partial html
@@ -216,28 +268,30 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
216
268
217
269
A_LINK
218
270
. replace_all ( & temp_html, |caps : & regex:: Captures < ' _ > | {
219
- let fixed = fix_a_links ( caps[ 2 ] . into ( ) , path) ;
271
+ let fixed = fix_a_links ( caps[ 2 ] . into ( ) , path, redirects . clone ( ) ) ;
220
272
format ! ( "{}{}\" " , & caps[ 1 ] , fixed)
221
273
} )
222
274
. into_owned ( )
223
275
. into ( )
224
276
}
225
277
226
278
match event {
227
- Event :: Start ( Tag :: Link ( link_type, dest, title) ) => {
228
- Event :: Start ( Tag :: Link ( link_type, fix_a_links ( dest, path) , title) )
229
- }
279
+ Event :: Start ( Tag :: Link ( link_type, dest, title) ) => Event :: Start ( Tag :: Link (
280
+ link_type,
281
+ fix_a_links ( dest, path, redirects) ,
282
+ title,
283
+ ) ) ,
230
284
Event :: Start ( Tag :: Image ( link_type, dest, title) ) => {
231
285
Event :: Start ( Tag :: Image ( link_type, fix ( dest, path) , title) )
232
286
}
233
- Event :: Html ( html) => Event :: Html ( fix_html ( html, path) ) ,
287
+ Event :: Html ( html) => Event :: Html ( fix_html ( html, path, redirects ) ) ,
234
288
_ => event,
235
289
}
236
290
}
237
291
238
292
/// Wrapper around the pulldown-cmark parser for rendering markdown to HTML.
239
293
pub fn render_markdown ( text : & str , curly_quotes : bool ) -> String {
240
- render_markdown_with_path ( text, curly_quotes, None )
294
+ render_markdown_with_path ( text, curly_quotes, None , HashMap :: new ( ) )
241
295
}
242
296
243
297
pub fn new_cmark_parser ( text : & str , curly_quotes : bool ) -> Parser < ' _ , ' _ > {
@@ -252,12 +306,17 @@ pub fn new_cmark_parser(text: &str, curly_quotes: bool) -> Parser<'_, '_> {
252
306
Parser :: new_ext ( text, opts)
253
307
}
254
308
255
- pub fn render_markdown_with_path ( text : & str , curly_quotes : bool , path : Option < & Path > ) -> String {
309
+ pub fn render_markdown_with_path (
310
+ text : & str ,
311
+ curly_quotes : bool ,
312
+ path : Option < & Path > ,
313
+ redirects : HashMap < String , String > ,
314
+ ) -> String {
256
315
let mut s = String :: with_capacity ( text. len ( ) * 3 / 2 ) ;
257
316
let p = new_cmark_parser ( text, curly_quotes) ;
258
317
let events = p
259
318
. map ( clean_codeblock_headers)
260
- . map ( |event| adjust_links ( event, path) ) ;
319
+ . map ( |event| adjust_links ( event, path, redirects . clone ( ) ) ) ;
261
320
262
321
html:: push_html ( & mut s, events) ;
263
322
s
0 commit comments