diff --git a/Cargo.lock b/Cargo.lock index 8ac6dfc2d4..8410a40aca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1367,6 +1367,7 @@ dependencies = [ "lazy_static", "mockito", "reqwest", + "utils", ] [[package]] @@ -3264,6 +3265,7 @@ dependencies = [ "filetime", "minify-html", "percent-encoding", + "regex", "serde", "slug", "tempfile", diff --git a/components/config/src/config/link_checker.rs b/components/config/src/config/link_checker.rs index 9d597d498b..2e81083bbd 100644 --- a/components/config/src/config/link_checker.rs +++ b/components/config/src/config/link_checker.rs @@ -1,6 +1,6 @@ use serde_derive::{Deserialize, Serialize}; -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] #[serde(default)] pub struct LinkChecker { /// Skip link checking for these URL prefixes @@ -9,8 +9,3 @@ pub struct LinkChecker { pub skip_anchor_prefixes: Vec, } -impl Default for LinkChecker { - fn default() -> LinkChecker { - LinkChecker { skip_prefixes: Vec::new(), skip_anchor_prefixes: Vec::new() } - } -} diff --git a/components/library/src/content/page.rs b/components/library/src/content/page.rs index 64d9a82c52..26c5340846 100644 --- a/components/library/src/content/page.rs +++ b/components/library/src/content/page.rs @@ -20,6 +20,7 @@ use crate::content::file_info::FileInfo; use crate::content::ser::SerializingPage; use crate::content::{find_related_assets, has_anchor}; use utils::fs::read_file; +use utils::links::has_anchor_id; lazy_static! { // Based on https://regex101.com/r/H2n38Z/1/tests @@ -300,6 +301,10 @@ impl Page { has_anchor(&self.toc, anchor) } + pub fn has_anchor_id(&self, id: &str) -> bool { + has_anchor_id(&self.content, id) + } + pub fn to_serialized<'a>(&'a self, library: &'a Library) -> SerializingPage<'a> { SerializingPage::from_page(self, library) } diff --git a/components/link_checker/Cargo.toml b/components/link_checker/Cargo.toml index 15de74d71c..4b4bca42d4 100644 --- a/components/link_checker/Cargo.toml +++ b/components/link_checker/Cargo.toml @@ -9,6 +9,7 @@ lazy_static = "1" config = { path = "../config" } errors = { path = "../errors" } +utils = { path = "../utils" } [dependencies.reqwest] version = "0.11" diff --git a/components/link_checker/src/lib.rs b/components/link_checker/src/lib.rs index 1d56db2c61..48502d7b3a 100644 --- a/components/link_checker/src/lib.rs +++ b/components/link_checker/src/lib.rs @@ -7,6 +7,7 @@ use config::LinkChecker; use std::collections::HashMap; use std::result; use std::sync::{Arc, RwLock}; +use utils::links::has_anchor_id; pub type Result = result::Result; @@ -104,22 +105,9 @@ fn has_anchor(url: &str) -> bool { fn check_page_for_anchor(url: &str, body: String) -> errors::Result<()> { let index = url.find('#').unwrap(); let anchor = url.get(index + 1..).unwrap(); - let checks = [ - format!(" id={}", anchor), - format!(" ID={}", anchor), - format!(" id='{}'", anchor), - format!(" ID='{}'", anchor), - format!(r#" id="{}""#, anchor), - format!(r#" ID="{}""#, anchor), - format!(" name={}", anchor), - format!(" NAME={}", anchor), - format!(" name='{}'", anchor), - format!(" NAME='{}'", anchor), - format!(r#" name="{}""#, anchor), - format!(r#" NAME="{}""#, anchor), - ]; - - if checks.iter().any(|check| body[..].contains(&check[..])) { + + + if has_anchor_id(&body, &anchor){ Ok(()) } else { Err(errors::Error::from(format!("Anchor `#{}` not found on page", anchor))) @@ -338,7 +326,7 @@ mod tests { #[test] fn skip_anchor_prefixes() { let ignore_url = format!("{}{}", mockito::server_url(), "/ignore/"); - let config = LinkChecker { skip_prefixes: vec![], skip_anchor_prefixes: vec![ignore_url] }; + let config = LinkChecker { skip_anchor_prefixes: vec![ignore_url], ..Default::default() }; let _m1 = mock("GET", "/ignore/i30hobj1cy") .with_header("Content-Type", "text/html") diff --git a/components/site/src/link_checking.rs b/components/site/src/link_checking.rs index ae2ecb6d31..b42b2d1a62 100644 --- a/components/site/src/link_checking.rs +++ b/components/site/src/link_checking.rs @@ -63,7 +63,8 @@ pub fn check_internal_links_with_anchors(site: &Site) -> Result<()> { let page = library .get_page(&full_path) .expect("Couldn't find section in check_internal_links_with_anchors"); - !page.has_anchor(anchor) + + !(page.has_anchor(anchor)||page.has_anchor_id(anchor)) } }); diff --git a/components/utils/Cargo.toml b/components/utils/Cargo.toml index 17ee05ae31..4773a441be 100644 --- a/components/utils/Cargo.toml +++ b/components/utils/Cargo.toml @@ -9,6 +9,7 @@ include = ["src/**/*"] tera = "1" unicode-segmentation = "1.2" walkdir = "2" +regex="1" toml = "0.5" serde = { version = "1.0", features = ["derive"] } slug = "0.1" diff --git a/components/utils/src/lib.rs b/components/utils/src/lib.rs index eac2325dc3..2a8162d0e1 100644 --- a/components/utils/src/lib.rs +++ b/components/utils/src/lib.rs @@ -1,5 +1,6 @@ pub mod de; pub mod fs; +pub mod links; pub mod minify; pub mod net; pub mod site; diff --git a/components/utils/src/links.rs b/components/utils/src/links.rs new file mode 100644 index 0000000000..3e18b2ada0 --- /dev/null +++ b/components/utils/src/links.rs @@ -0,0 +1,47 @@ +use regex::Regex; + + +pub fn has_anchor_id(content: &str, anchor: &str) -> bool { + let checks = anchor_id_checks(anchor); + checks.is_match(content) +} + +fn anchor_id_checks(anchor:&str) -> Regex { + Regex::new( + &format!(r#" (?i)(id|name) *= *("|')*{}("|'| |>)+"#, anchor) + ).unwrap() +} + + +#[cfg(test)] +mod tests{ + use super::anchor_id_checks; + + fn check(anchor:&str, content:&str) -> bool { + anchor_id_checks(anchor).is_match(content) + } + + #[test] + fn matchers () { + let m = |content| {check("fred", content)}; + + // Canonical match/non match + assert!(m(r#""#)); + assert!(m(r#""#)); + assert!(!m(r#""#)); + + // Whitespace variants + assert!(m(r#""#)); + assert!(m(r#""#)); + assert!(m(r#""#)); + assert!(m(r#""#)); + + // Quote variants + assert!(m(r#""#)); + assert!(m(r#""#)); + + // Case variants + assert!(m(r#""#)); + assert!(m(r#""#)); + } +}