Skip to content

Commit 6693b39

Browse files
committed
add --link-targets-dir flag to linkchecker
1 parent 984926e commit 6693b39

File tree

1 file changed

+52
-25
lines changed

1 file changed

+52
-25
lines changed

src/tools/linkchecker/main.rs

Lines changed: 52 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717
//! should catch the majority of "broken link" cases.
1818
1919
use std::cell::{Cell, RefCell};
20+
use std::collections::hash_map::Entry;
2021
use std::collections::{HashMap, HashSet};
2122
use std::fs;
22-
use std::io::ErrorKind;
23+
use std::iter::once;
2324
use std::path::{Component, Path, PathBuf};
2425
use std::rc::Rc;
2526
use std::time::Instant;
@@ -112,6 +113,7 @@ macro_rules! t {
112113

113114
struct Cli {
114115
docs: PathBuf,
116+
link_targets_dirs: Vec<PathBuf>,
115117
}
116118

117119
fn main() {
@@ -123,7 +125,11 @@ fn main() {
123125
}
124126
};
125127

126-
let mut checker = Checker { root: cli.docs.clone(), cache: HashMap::new() };
128+
let mut checker = Checker {
129+
root: cli.docs.clone(),
130+
link_targets_dirs: cli.link_targets_dirs,
131+
cache: HashMap::new(),
132+
};
127133
let mut report = Report {
128134
errors: 0,
129135
start: Instant::now(),
@@ -144,19 +150,26 @@ fn main() {
144150
}
145151

146152
fn parse_cli() -> Result<Cli, String> {
147-
fn to_canonical_path(arg: &str) -> Result<PathBuf, String> {
148-
PathBuf::from(arg).canonicalize().map_err(|e| format!("could not canonicalize {arg}: {e}"))
153+
fn to_absolute_path(arg: &str) -> Result<PathBuf, String> {
154+
std::path::absolute(arg).map_err(|e| format!("could not convert to absolute {arg}: {e}"))
149155
}
150156

151157
let mut verbatim = false;
152158
let mut docs = None;
159+
let mut link_targets_dirs = Vec::new();
153160

154161
let mut args = std::env::args().skip(1);
155162
while let Some(arg) = args.next() {
156163
if !verbatim && arg == "--" {
157164
verbatim = true;
158165
} else if !verbatim && (arg == "-h" || arg == "--help") {
159166
usage_and_exit(0)
167+
} else if !verbatim && arg == "--link-targets-dir" {
168+
link_targets_dirs.push(to_absolute_path(
169+
&args.next().ok_or("missing value for --link-targets-dir")?,
170+
)?);
171+
} else if !verbatim && let Some(value) = arg.strip_prefix("--link-targets-dir=") {
172+
link_targets_dirs.push(to_absolute_path(value)?);
160173
} else if !verbatim && arg.starts_with('-') {
161174
return Err(format!("unknown flag: {arg}"));
162175
} else if docs.is_none() {
@@ -166,16 +179,20 @@ fn parse_cli() -> Result<Cli, String> {
166179
}
167180
}
168181

169-
Ok(Cli { docs: to_canonical_path(&docs.ok_or("missing first positional argument")?)? })
182+
Ok(Cli {
183+
docs: to_absolute_path(&docs.ok_or("missing first positional argument")?)?,
184+
link_targets_dirs,
185+
})
170186
}
171187

172188
fn usage_and_exit(code: i32) -> ! {
173-
eprintln!("usage: linkchecker <path>");
189+
eprintln!("usage: linkchecker PATH [--link-targets-dir=PATH ...]");
174190
std::process::exit(code)
175191
}
176192

177193
struct Checker {
178194
root: PathBuf,
195+
link_targets_dirs: Vec<PathBuf>,
179196
cache: Cache,
180197
}
181198

@@ -461,37 +478,34 @@ impl Checker {
461478

462479
/// Load a file from disk, or from the cache if available.
463480
fn load_file(&mut self, file: &Path, report: &mut Report) -> (String, &FileEntry) {
464-
// https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
465-
#[cfg(windows)]
466-
const ERROR_INVALID_NAME: i32 = 123;
467-
468481
let pretty_path =
469482
file.strip_prefix(&self.root).unwrap_or(file).to_str().unwrap().to_string();
470483

471-
let entry =
472-
self.cache.entry(pretty_path.clone()).or_insert_with(|| match fs::metadata(file) {
484+
for base in once(&self.root).chain(self.link_targets_dirs.iter()) {
485+
let entry = self.cache.entry(pretty_path.clone());
486+
if let Entry::Occupied(e) = &entry
487+
&& !matches!(e.get(), FileEntry::Missing)
488+
{
489+
break;
490+
}
491+
492+
let file = base.join(&pretty_path);
493+
entry.insert_entry(match fs::metadata(&file) {
473494
Ok(metadata) if metadata.is_dir() => FileEntry::Dir,
474495
Ok(_) => {
475496
if file.extension().and_then(|s| s.to_str()) != Some("html") {
476497
FileEntry::OtherFile
477498
} else {
478499
report.html_files += 1;
479-
load_html_file(file, report)
500+
load_html_file(&file, report)
480501
}
481502
}
482-
Err(e) if e.kind() == ErrorKind::NotFound => FileEntry::Missing,
483-
Err(e) => {
484-
// If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME` rather than `NotFound`.
485-
// Explicitly check for that so that the broken link can be allowed in `LINKCHECK_EXCEPTIONS`.
486-
#[cfg(windows)]
487-
if e.raw_os_error() == Some(ERROR_INVALID_NAME)
488-
&& file.as_os_str().to_str().map_or(false, |s| s.contains("::"))
489-
{
490-
return FileEntry::Missing;
491-
}
492-
panic!("unexpected read error for {}: {}", file.display(), e);
493-
}
503+
Err(e) if is_not_found_error(&file, &e) => FileEntry::Missing,
504+
Err(e) => panic!("unexpected read error for {}: {}", file.display(), e),
494505
});
506+
}
507+
508+
let entry = self.cache.get(&pretty_path).unwrap();
495509
(pretty_path, entry)
496510
}
497511
}
@@ -670,3 +684,16 @@ fn parse_ids(ids: &mut HashSet<String>, file: &str, source: &str, report: &mut R
670684
ids.insert(encoded);
671685
}
672686
}
687+
688+
fn is_not_found_error(path: &Path, error: &std::io::Error) -> bool {
689+
// https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
690+
const WINDOWS_ERROR_INVALID_NAME: i32 = 123;
691+
692+
error.kind() == std::io::ErrorKind::NotFound
693+
// If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME`
694+
// rather than `NotFound`. Explicitly check for that so that the broken link can be allowed
695+
// in `LINKCHECK_EXCEPTIONS`.
696+
|| (cfg!(windows)
697+
&& error.raw_os_error() == Some(WINDOWS_ERROR_INVALID_NAME)
698+
&& path.as_os_str().to_str().map_or(false, |s| s.contains("::")))
699+
}

0 commit comments

Comments
 (0)