Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

linkchecker: Add support for <base> tag #41992

Merged
merged 2 commits into from
May 15, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The tracking issue for this feature is: [#23121]

[#23121]: https://github.com/rust-lang/rust/issues/23121

See also [`slice_patterns`](slice-patterns.html).
See also [`slice_patterns`](language-features/slice-patterns.html).

------------------------

Expand Down
2 changes: 1 addition & 1 deletion src/doc/unstable-book/src/language-features/asm.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,4 +190,4 @@ constraints, etc.
[llvm-docs]: http://llvm.org/docs/LangRef.html#inline-assembler-expressions

If you need more power and don't mind losing some of the niceties of
`asm!`, check out [global_asm](global_asm.html).
`asm!`, check out [global_asm](language-features/global_asm.html).
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The tracking issue for this feature is: [#29641]

[#29641]: https://github.com/rust-lang/rust/issues/29641

See also [`box_syntax`](box-syntax.html)
See also [`box_syntax`](language-features/box-syntax.html)

------------------------

Expand Down
2 changes: 1 addition & 1 deletion src/doc/unstable-book/src/language-features/box-syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The tracking issue for this feature is: [#27779]

[#27779]: https://github.com/rust-lang/rust/issues/27779

See also [`box_patterns`](box-patterns.html)
See also [`box_patterns`](language-features/box-patterns.html)

------------------------

Expand Down
4 changes: 2 additions & 2 deletions src/doc/unstable-book/src/language-features/global_asm.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,5 @@ usages and placed the larger, single usage in the crate root.

If you don't need quite as much power and flexibility as
`global_asm!` provides, and you don't mind restricting your inline
assembly to `fn` bodies only, you might try the [asm](asm.html)
feature instead.
assembly to `fn` bodies only, you might try the
[asm](language-features/asm.html) feature instead.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ This feature is part of "compiler plugins." It will often be used with the
[`plugin`] and `rustc_private` features as well. For more details, see
their docs.

[`plugin`]: plugin.html
[`plugin`]: language-features/plugin.html

------------------------
2 changes: 1 addition & 1 deletion src/doc/unstable-book/src/language-features/plugin.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ The tracking issue for this feature is: [#29597]
This feature is part of "compiler plugins." It will often be used with the
[`plugin_registrar`] and `rustc_private` features.

[`plugin_registrar`]: plugin-registrar.html
[`plugin_registrar`]: language-features/plugin-registrar.html

------------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ The tracking issue for this feature is: [#23121]

[#23121]: https://github.com/rust-lang/rust/issues/23121

See also [`advanced_slice_patterns`](advanced-slice-patterns.html).
See also
[`advanced_slice_patterns`](language-features/advanced-slice-patterns.html).

------------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The tracking issue for this feature is: [#33082]

[#33082]: https://github.com/rust-lang/rust/issues/33082

See also [`alloc_system`](alloc-system.html).
See also [`alloc_system`](library-features/alloc-system.html).

------------------------

Expand Down
2 changes: 1 addition & 1 deletion src/doc/unstable-book/src/library-features/alloc-system.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The tracking issue for this feature is: [#33082]

[#33082]: https://github.com/rust-lang/rust/issues/33082

See also [`alloc_jemalloc`](alloc-jemalloc.html).
See also [`alloc_jemalloc`](library-features/alloc-jemalloc.html).

------------------------

Expand Down
89 changes: 33 additions & 56 deletions src/tools/linkchecker/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ macro_rules! t {
}

fn main() {
let docs = env::args().nth(1).unwrap();
let docs = env::args_os().nth(1).unwrap();
let docs = env::current_dir().unwrap().join(docs);
let mut errors = false;
walk(&mut HashMap::new(), &docs, &docs, &mut errors);
Expand All @@ -65,15 +65,14 @@ enum Redirect {
struct FileEntry {
source: String,
ids: HashSet<String>,
names: HashSet<String>,
}

type Cache = HashMap<PathBuf, FileEntry>;

impl FileEntry {
fn parse_ids(&mut self, file: &Path, contents: &str, errors: &mut bool) {
if self.ids.is_empty() {
with_attrs_in_source(contents, " id", |fragment, i| {
with_attrs_in_source(contents, " id", |fragment, i, _| {
let frag = fragment.trim_left_matches("#").to_owned();
if !self.ids.insert(frag) {
*errors = true;
Expand All @@ -82,15 +81,6 @@ impl FileEntry {
});
}
}

fn parse_names(&mut self, contents: &str) {
if self.names.is_empty() {
with_attrs_in_source(contents, " name", |fragment, _| {
let frag = fragment.trim_left_matches("#").to_owned();
self.names.insert(frag);
});
}
}
}

fn walk(cache: &mut Cache, root: &Path, dir: &Path, errors: &mut bool) {
Expand All @@ -116,15 +106,8 @@ fn check(cache: &mut Cache,
file: &Path,
errors: &mut bool)
-> Option<PathBuf> {
// ignore js files as they are not prone to errors as the rest of the
// documentation is and they otherwise bring up false positives.
if file.extension().and_then(|s| s.to_str()) == Some("js") {
return None;
}

// ignore handlebars files as they use {{}} to build links, we only
// want to test the generated files
if file.extension().and_then(|s| s.to_str()) == Some("hbs") {
// Ignore none HTML files.
if file.extension().and_then(|s| s.to_str()) != Some("html") {
return None;
}

Expand All @@ -147,13 +130,7 @@ fn check(cache: &mut Cache,
return None;
}

// mdbook uses the HTML <base> tag to handle links for subdirectories, which
// linkchecker doesn't support
if file.to_str().unwrap().contains("unstable-book") {
return None;
}

let res = load_file(cache, root, PathBuf::from(file), SkipRedirect);
let res = load_file(cache, root, file, SkipRedirect);
let (pretty_file, contents) = match res {
Ok(res) => res,
Err(_) => return None,
Expand All @@ -162,13 +139,10 @@ fn check(cache: &mut Cache,
cache.get_mut(&pretty_file)
.unwrap()
.parse_ids(&pretty_file, &contents, errors);
cache.get_mut(&pretty_file)
.unwrap()
.parse_names(&contents);
}

// Search for anything that's the regex 'href[ ]*=[ ]*".*?"'
with_attrs_in_source(&contents, " href", |url, i| {
with_attrs_in_source(&contents, " href", |url, i, base| {
// Ignore external URLs
if url.starts_with("http:") || url.starts_with("https:") ||
url.starts_with("javascript:") || url.starts_with("ftp:") ||
Expand All @@ -184,9 +158,9 @@ fn check(cache: &mut Cache,
// Once we've plucked out the URL, parse it using our base url and
// then try to extract a file path.
let mut path = file.to_path_buf();
if !url.is_empty() {
if !base.is_empty() || !url.is_empty() {
path.pop();
for part in Path::new(url).components() {
for part in Path::new(base).join(url).components() {
match part {
Component::Prefix(_) |
Component::RootDir => panic!(),
Expand All @@ -197,13 +171,6 @@ fn check(cache: &mut Cache,
}
}

if let Some(extension) = path.extension() {
// don't check these files
if extension == "png" {
return;
}
}

// Alright, if we've found a file name then this file had better
// exist! If it doesn't then we register and print an error.
if path.exists() {
Expand All @@ -218,11 +185,17 @@ fn check(cache: &mut Cache,
pretty_path.display());
return;
}
let res = load_file(cache, root, path.clone(), FromRedirect(false));
if let Some(extension) = path.extension() {
// Ignore none HTML files.
if extension != "html" {
return;
}
}
let res = load_file(cache, root, &path, FromRedirect(false));
let (pretty_path, contents) = match res {
Ok(res) => res,
Err(LoadError::IOError(err)) => {
panic!(format!("error loading {}: {}", path.display(), err));
panic!("error loading {}: {}", path.display(), err);
}
Err(LoadError::BrokenRedirect(target, _)) => {
*errors = true;
Expand All @@ -245,11 +218,10 @@ fn check(cache: &mut Cache,

let entry = &mut cache.get_mut(&pretty_path).unwrap();
entry.parse_ids(&pretty_path, &contents, errors);
entry.parse_names(&contents);

if !(entry.ids.contains(*fragment) || entry.names.contains(*fragment)) {
if !entry.ids.contains(*fragment) {
*errors = true;
print!("{}:{}: broken link fragment ",
print!("{}:{}: broken link fragment ",
pretty_file.display(),
i + 1);
println!("`#{}` pointing to `{}`", fragment, pretty_path.display());
Expand All @@ -267,7 +239,7 @@ fn check(cache: &mut Cache,

fn load_file(cache: &mut Cache,
root: &Path,
mut file: PathBuf,
file: &Path,
redirect: Redirect)
-> Result<(PathBuf, String), LoadError> {
let mut contents = String::new();
Expand All @@ -279,9 +251,9 @@ fn load_file(cache: &mut Cache,
None
}
Entry::Vacant(entry) => {
let mut fp = File::open(file.clone()).map_err(|err| {
let mut fp = File::open(file).map_err(|err| {
if let FromRedirect(true) = redirect {
LoadError::BrokenRedirect(file.clone(), err)
LoadError::BrokenRedirect(file.to_path_buf(), err)
} else {
LoadError::IOError(err)
}
Expand All @@ -297,17 +269,14 @@ fn load_file(cache: &mut Cache,
entry.insert(FileEntry {
source: contents.clone(),
ids: HashSet::new(),
names: HashSet::new(),
});
}
maybe
}
};
file.pop();
match maybe_redirect.map(|url| file.join(url)) {
match maybe_redirect.map(|url| file.parent().unwrap().join(url)) {
Some(redirect_file) => {
let path = PathBuf::from(redirect_file);
load_file(cache, root, path, FromRedirect(true))
load_file(cache, root, &redirect_file, FromRedirect(true))
}
None => Ok((pretty_file, contents)),
}
Expand All @@ -329,10 +298,14 @@ fn maybe_redirect(source: &str) -> Option<String> {
})
}

fn with_attrs_in_source<F: FnMut(&str, usize)>(contents: &str, attr: &str, mut f: F) {
fn with_attrs_in_source<F: FnMut(&str, usize, &str)>(contents: &str, attr: &str, mut f: F) {
let mut base = "";
for (i, mut line) in contents.lines().enumerate() {
while let Some(j) = line.find(attr) {
let rest = &line[j + attr.len()..];
// The base tag should always be the first link in the document so
// we can get away with using one pass.
let is_base = line[..j].ends_with("<base");
line = rest;
let pos_equals = match rest.find("=") {
Some(i) => i,
Expand All @@ -358,7 +331,11 @@ fn with_attrs_in_source<F: FnMut(&str, usize)>(contents: &str, attr: &str, mut f
Some(i) => &rest[..i],
None => continue,
};
f(url, i)
if is_base {
base = url;
continue;
}
f(url, i, base)
}
}
}