Skip to content

Commit

Permalink
Merge pull request #695 from epage/ignore
Browse files Browse the repository at this point in the history
feat(config): Custom ignores
  • Loading branch information
epage authored Mar 22, 2023
2 parents 0d46368 + ac46a6b commit 5253e55
Show file tree
Hide file tree
Showing 10 changed files with 154 additions and 6 deletions.
1 change: 1 addition & 0 deletions crates/typos-cli/src/bin/typos-cli/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ impl FileArgs {
locale: self.locale,
..Default::default()
}),
extend_ignore_re: Default::default(),
}
}

Expand Down
28 changes: 27 additions & 1 deletion crates/typos-cli/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ impl GlobEngineConfig {
}
}

#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
//#[serde(deny_unknown_fields)] // Doesn't work with `flatten`
#[serde(default)]
#[serde(rename_all = "kebab-case")]
Expand All @@ -283,6 +283,8 @@ pub struct EngineConfig {
pub tokenizer: Option<TokenizerConfig>,
#[serde(flatten)]
pub dict: Option<DictConfig>,
#[serde(with = "serde_regex")]
pub extend_ignore_re: Vec<regex::Regex>,
}

impl EngineConfig {
Expand All @@ -298,6 +300,7 @@ impl EngineConfig {
.unwrap_or_else(TokenizerConfig::from_defaults),
),
dict: Some(empty.dict.unwrap_or_else(DictConfig::from_defaults)),
extend_ignore_re: Default::default(),
}
}

Expand Down Expand Up @@ -327,6 +330,8 @@ impl EngineConfig {
let mut dict = Some(dict);
std::mem::swap(&mut dict, &mut self.dict);
}
self.extend_ignore_re
.extend(source.extend_ignore_re.iter().cloned());
}

pub fn binary(&self) -> bool {
Expand All @@ -340,8 +345,29 @@ impl EngineConfig {
pub fn check_file(&self) -> bool {
self.check_file.unwrap_or(true)
}

pub fn extend_ignore_re(&self) -> Box<dyn Iterator<Item = &regex::Regex> + '_> {
Box::new(self.extend_ignore_re.iter())
}
}

impl PartialEq for EngineConfig {
fn eq(&self, rhs: &Self) -> bool {
self.binary == rhs.binary
&& self.check_filename == rhs.check_filename
&& self.check_file == rhs.check_file
&& self.tokenizer == rhs.tokenizer
&& self.dict == rhs.dict
&& self
.extend_ignore_re
.iter()
.map(|r| r.as_str())
.eq(rhs.extend_ignore_re.iter().map(|r| r.as_str()))
}
}

impl Eq for EngineConfig {}

#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(default)]
Expand Down
62 changes: 62 additions & 0 deletions crates/typos-cli/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,14 @@ impl FileChecker for Typos {
reporter.report(msg.into())?;
} else {
let mut accum_line_num = AccumulateLineNum::new();
let mut ignores: Option<Ignores> = None;
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) {
if ignores
.get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
.is_ignored(typo.span())
{
continue;
}
let line_num = accum_line_num.line_num(&buffer, typo.byte_offset);
let (line, line_offset) = extract_line(&buffer, typo.byte_offset);
let msg = report::Typo {
Expand Down Expand Up @@ -86,7 +93,14 @@ impl FileChecker for FixTypos {
} else {
let mut fixes = Vec::new();
let mut accum_line_num = AccumulateLineNum::new();
let mut ignores: Option<Ignores> = None;
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) {
if ignores
.get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
.is_ignored(typo.span())
{
continue;
}
if is_fixable(&typo) {
fixes.push(typo.into_owned());
} else {
Expand Down Expand Up @@ -163,7 +177,14 @@ impl FileChecker for DiffTypos {
} else {
let mut fixes = Vec::new();
let mut accum_line_num = AccumulateLineNum::new();
let mut ignores: Option<Ignores> = None;
for typo in typos::check_bytes(&buffer, policy.tokenizer, policy.dict) {
if ignores
.get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
.is_ignored(typo.span())
{
continue;
}
if is_fixable(&typo) {
fixes.push(typo.into_owned());
} else {
Expand Down Expand Up @@ -276,7 +297,14 @@ impl FileChecker for Identifiers {
let msg = report::BinaryFile { path };
reporter.report(msg.into())?;
} else {
let mut ignores: Option<Ignores> = None;
for word in policy.tokenizer.parse_bytes(&buffer) {
if ignores
.get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
.is_ignored(word.span())
{
continue;
}
// HACK: Don't look up the line_num per entry to better match the performance
// of Typos for comparison purposes. We don't really get much out of it
// anyway.
Expand Down Expand Up @@ -329,11 +357,18 @@ impl FileChecker for Words {
let msg = report::BinaryFile { path };
reporter.report(msg.into())?;
} else {
let mut ignores: Option<Ignores> = None;
for word in policy
.tokenizer
.parse_bytes(&buffer)
.flat_map(|i| i.split())
{
if ignores
.get_or_insert_with(|| Ignores::new(&buffer, policy.ignore))
.is_ignored(word.span())
{
continue;
}
// HACK: Don't look up the line_num per entry to better match the performance
// of Typos for comparison purposes. We don't really get much out of it
// anyway.
Expand Down Expand Up @@ -644,6 +679,33 @@ fn walk_entry(
Ok(())
}

#[derive(Clone, Debug)]
struct Ignores {
blocks: Vec<std::ops::Range<usize>>,
}

impl Ignores {
fn new(content: &[u8], ignores: &[regex::Regex]) -> Self {
let mut blocks = Vec::new();
if let Ok(content) = std::str::from_utf8(content) {
for ignore in ignores {
for mat in ignore.find_iter(content) {
blocks.push(mat.range());
}
}
}
Self { blocks }
}

fn is_ignored(&self, span: std::ops::Range<usize>) -> bool {
let start = span.start;
let end = span.end.saturating_sub(1);
self.blocks
.iter()
.any(|block| block.contains(&start) || block.contains(&end))
}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
27 changes: 22 additions & 5 deletions crates/typos-cli/src/policy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ pub struct ConfigEngine<'s> {
walk: Intern<crate::config::Walk>,
tokenizer: Intern<typos::tokens::Tokenizer>,
dict: Intern<crate::dict::Override<'s, 's, crate::dict::BuiltIn>>,
ignore: Intern<Vec<regex::Regex>>,
}

impl<'s> ConfigEngine<'s> {
Expand All @@ -54,6 +55,7 @@ impl<'s> ConfigEngine<'s> {
walk: Default::default(),
tokenizer: Default::default(),
dict: Default::default(),
ignore: Default::default(),
}
}

Expand Down Expand Up @@ -88,7 +90,7 @@ impl<'s> ConfigEngine<'s> {
dir.type_matcher.definitions()
}

pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_> {
pub fn policy(&self, path: &std::path::Path) -> Policy<'_, '_, '_> {
debug_assert!(path.is_absolute(), "{} is not absolute", path.display());
let dir = self.get_dir(path).expect("`walk()` should be called first");
let (file_type, file_config) = dir.get_file_config(path);
Expand All @@ -99,6 +101,7 @@ impl<'s> ConfigEngine<'s> {
binary: file_config.binary,
tokenizer: self.get_tokenizer(&file_config),
dict: self.get_dict(&file_config),
ignore: self.get_ignore(&file_config),
}
}

Expand All @@ -114,6 +117,10 @@ impl<'s> ConfigEngine<'s> {
self.dict.get(file.dict)
}

fn get_ignore(&self, file: &FileConfig) -> &[regex::Regex] {
self.ignore.get(file.ignore)
}

fn get_dir(&self, path: &std::path::Path) -> Option<&DirConfig> {
for path in path.ancestors() {
if let Some(dir) = self.configs.get(path) {
Expand Down Expand Up @@ -220,7 +227,10 @@ impl<'s> ConfigEngine<'s> {
let check_filename = engine.check_filename();
let check_file = engine.check_file();
let crate::config::EngineConfig {
tokenizer, dict, ..
tokenizer,
dict,
extend_ignore_re,
..
} = engine;
let tokenizer_config =
tokenizer.unwrap_or_else(crate::config::TokenizerConfig::from_defaults);
Expand Down Expand Up @@ -254,12 +264,15 @@ impl<'s> ConfigEngine<'s> {
let dict = self.dict.intern(dict);
let tokenizer = self.tokenizer.intern(tokenizer);

let ignore = self.ignore.intern(extend_ignore_re);

FileConfig {
check_filenames: check_filename,
check_files: check_file,
binary,
tokenizer,
dict,
ignore,
}
}
}
Expand Down Expand Up @@ -328,20 +341,22 @@ struct FileConfig {
check_filenames: bool,
check_files: bool,
binary: bool,
ignore: usize,
}

#[non_exhaustive]
#[derive(derive_setters::Setters)]
pub struct Policy<'t, 'd> {
pub struct Policy<'t, 'd, 'i> {
pub check_filenames: bool,
pub check_files: bool,
pub file_type: Option<&'d str>,
pub binary: bool,
pub tokenizer: &'t typos::tokens::Tokenizer,
pub dict: &'d dyn typos::Dictionary,
pub ignore: &'i [regex::Regex],
}

impl<'t, 'd> Policy<'t, 'd> {
impl<'t, 'd, 'i> Policy<'t, 'd, 'i> {
pub fn new() -> Self {
Default::default()
}
Expand All @@ -350,8 +365,9 @@ impl<'t, 'd> Policy<'t, 'd> {
static DEFAULT_TOKENIZER: once_cell::sync::Lazy<typos::tokens::Tokenizer> =
once_cell::sync::Lazy::new(typos::tokens::Tokenizer::new);
static DEFAULT_DICT: crate::dict::BuiltIn = crate::dict::BuiltIn::new(crate::config::Locale::En);
static DEFAULT_IGNORE: &[regex::Regex] = &[];

impl<'t, 'd> Default for Policy<'t, 'd> {
impl<'t, 'd, 'i> Default for Policy<'t, 'd, 'i> {
fn default() -> Self {
Self {
check_filenames: true,
Expand All @@ -360,6 +376,7 @@ impl<'t, 'd> Default for Policy<'t, 'd> {
binary: false,
tokenizer: &DEFAULT_TOKENIZER,
dict: &DEFAULT_DICT,
ignore: DEFAULT_IGNORE,
}
}
}
Expand Down
8 changes: 8 additions & 0 deletions crates/typos-cli/tests/cmd/extend-ignore-re.in/_typos.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[files]
extend-exclude = ["_typos.toml"]

[default]
extend-ignore-re = ["`.*`"]

[default.extend-identifiers]
hello = "goodbye"
1 change: 1 addition & 0 deletions crates/typos-cli/tests/cmd/extend-ignore-re.in/file.ignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hello `hello`
12 changes: 12 additions & 0 deletions crates/typos-cli/tests/cmd/extend-ignore-re.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
bin.name = "typos"
stdin = ""
stdout = """
error: `hello` should be `goodbye`
--> ./file.ignore:1:1
|
1 | hello `hello`
| ^^^^^
|
"""
stderr = ""
status.code = 2
6 changes: 6 additions & 0 deletions crates/typos/src/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ impl<'m> Typo<'m> {
corrections: self.corrections.borrow(),
}
}

pub fn span(&self) -> std::ops::Range<usize> {
let start = self.byte_offset;
let end = start + self.typo.len();
start..end
}
}

impl<'m> Default for Typo<'m> {
Expand Down
14 changes: 14 additions & 0 deletions crates/typos/src/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,13 @@ impl<'t> Identifier<'t> {
self.offset
}

#[inline]
pub fn span(&self) -> std::ops::Range<usize> {
let start = self.offset;
let end = start + self.token.len();
start..end
}

/// Split into individual Words.
#[inline]
pub fn split(&self) -> impl Iterator<Item = Word<'t>> {
Expand Down Expand Up @@ -702,6 +709,13 @@ impl<'t> Word<'t> {
pub fn offset(&self) -> usize {
self.offset
}

#[inline]
pub fn span(&self) -> std::ops::Range<usize> {
let start = self.offset;
let end = start + self.token.len();
start..end
}
}

struct SplitIdent<'s> {
Expand Down
1 change: 1 addition & 0 deletions docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Configuration is read from the following (in precedence order)
| default.check-file | \- | bool | Verifying spelling in files. |
| default.unicode | --unicode | bool | Allow unicode characters in identifiers (and not just ASCII) |
| default.locale | --locale | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. |
| default.extend-ignore-re | \- | list of [regexes](https://docs.rs/regex/latest/regex/index.html#syntax) | Custom uncorrectable sections (e.g. markdown code fences, PGP signatures, etc) |
| default.extend-identifiers | \- | table of strings | Corrections for [identifiers](./design.md#identifiers-and-words). When the correction is blank, the identifier is never valid. When the correction is the key, the identifier is always valid. |
| default.extend-ignore-identifiers-re | \- | list of [regexes](https://docs.rs/regex/latest/regex/index.html#syntax) | Pattern-match always-valid identifiers |
| default.extend-words | \- | table of strings | Corrections for [words](./design.md#identifiers-and-words). When the correction is blank, the word is never valid. When the correction is the key, the word is always valid. |
Expand Down

0 comments on commit 5253e55

Please sign in to comment.