diff --git a/README.md b/README.md index 75542fb6..a9777cea 100644 --- a/README.md +++ b/README.md @@ -229,22 +229,13 @@ $ echo 'ghp_oHn0As3cr3T!!' | srgn 'ghp_[[:alnum:]]+' '*' # A GitHub token *!! ``` -However, in the presence of capture groups, the *individual characters comprising a -capture group match* are treated *individually* for processing, allowing a replacement -to be repeated: - -```console -$ echo 'Hide ghp_th15 and ghp_th4t' | srgn '(ghp_[[:alnum:]]+)' '*' -Hide ******** and ******** -``` - Advanced regex features are [supported](https://docs.rs/fancy-regex/0.11.0/fancy_regex/index.html#syntax), for example lookarounds: ```console -$ echo 'ghp_oHn0As3cr3T' | srgn '(?<=ghp_)([[:alnum:]]+)' '*' -ghp_*********** +$ echo 'ghp_oHn0As3cr3T' | srgn '(?<=ghp_)[[:alnum:]]+' '*' +ghp_* ``` Take care in using these safely, as advanced patterns come without certain [safety and @@ -271,6 +262,39 @@ $ echo 'Mood: 🤮🤒🤧🦠 :(' | srgn '\p{Emoji_Presentation}' '😷' Mood: 😷😷😷😷 :( ``` +##### Variables + +Replacements are aware of variables, which are made accessible for use through regex +capture groups. Capture groups can be numbered, or optionally named. The zeroth capture +group corresponds to the entire match. + +```console +$ echo 'Swap It' | srgn '(\w+) (\w+)' '$2 $1' # Regular, numbered +It Swap +$ echo 'Swap It' | srgn '(\w+) (\w+)' '$2 $1$1$1' # Use as many times as you'd like +It SwapSwapSwap +$ echo 'Call +1-206-555-0100!' | srgn 'Call (\+?\d\-\d{3}\-\d{3}\-\d{4}).+' 'The phone number in "$0" is: $1.' # Variable `0` is the entire match +The phone number in "Call +1-206-555-0100!" is: +1-206-555-0100. +``` + +A more advanced use case is, for example, code refactoring using named capture groups +(perhaps you can come up with a more useful one...): + +```console +$ echo 'let x = 3;' | srgn 'let (?[a-z]+) = (?.+);' 'const $var$var = $expr + $expr;' +const xx = 3 + 3; +``` + +As in bash, use curly braces to disambiguate variables from immediately adjacent +content: + +```console +$ echo '12' | srgn '(\d)(\d)' '$2${1}1' +211 +$ echo '12' | srgn '(\d)(\d)' '$2$11' # will fail (`11` is unknown) +$ echo '12' | srgn '(\d)(\d)' '$2${11' # will fail (brace was not closed) +``` + #### Beyond replacement Seeing how the replacement is merely a static string, its usefulness is limited. This is diff --git a/src/actions/deletion/mod.rs b/src/actions/deletion/mod.rs index 22b36f77..6f68d7e7 100644 --- a/src/actions/deletion/mod.rs +++ b/src/actions/deletion/mod.rs @@ -1,6 +1,5 @@ -use log::info; - use super::Action; +use log::info; /// Deletes everything in the input. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] diff --git a/src/actions/lower/mod.rs b/src/actions/lower/mod.rs index 551fab2f..d3abe101 100644 --- a/src/actions/lower/mod.rs +++ b/src/actions/lower/mod.rs @@ -1,6 +1,5 @@ -use log::info; - use super::Action; +use log::info; /// Renders in lowercase. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] diff --git a/src/actions/mod.rs b/src/actions/mod.rs index ba3f9c0b..e204f001 100644 --- a/src/actions/mod.rs +++ b/src/actions/mod.rs @@ -9,12 +9,14 @@ mod symbols; mod titlecase; mod upper; +use crate::scoping::scope::ScopeContext; pub use deletion::Deletion; #[cfg(feature = "german")] pub use german::German; pub use lower::Lower; pub use normalization::Normalization; -pub use replace::{Replacement, ReplacementCreationError}; +pub use replace::{Replacement, ReplacementError}; +use std::{error::Error, fmt}; #[cfg(feature = "symbols")] pub use symbols::{inversion::SymbolsInversion, Symbols}; pub use titlecase::Titlecase; @@ -31,8 +33,41 @@ pub trait Action: Send + Sync { /// This is infallible: it cannot fail in the sense of [`Result`]. It can only /// return incorrect results, which would be bugs (please report). fn act(&self, input: &str) -> String; + + /// Acts taking into account additional context. + /// + /// By default, the context is ignored and [`Action::act`] is called. Implementors + /// which need and know how to handle additional context can overwrite this method. + /// + /// # Errors + /// + /// This is fallible, as the context is dynamically created at runtime and + /// potentially contains bad data. See docs of the [`Err`] variant type. + fn act_with_context(&self, input: &str, context: &ScopeContext) -> Result { + let _ = context; // Mark variable as used + Ok(self.act(input)) + } +} + +/// An error during application of an action. +#[derive(Debug, PartialEq, Eq)] +pub enum ActionError { + /// Produced if [`Replacement`] fails. + ReplacementError(ReplacementError), } +impl fmt::Display for ActionError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::ReplacementError(re) => { + write!(f, "Action failed in replacement: {re}") + } + } + } +} + +impl Error for ActionError {} + /// Any function that can be used as an [`Action`]. impl Action for T where @@ -49,4 +84,8 @@ impl Action for Box { fn act(&self, input: &str) -> String { self.as_ref().act(input) } + + fn act_with_context(&self, input: &str, context: &ScopeContext) -> Result { + self.as_ref().act_with_context(input, context) + } } diff --git a/src/actions/normalization/mod.rs b/src/actions/normalization/mod.rs index 6dc1884a..aeac630e 100644 --- a/src/actions/normalization/mod.rs +++ b/src/actions/normalization/mod.rs @@ -13,23 +13,3 @@ impl Action for Normalization { input.nfd().filter(|c| !c.is_mark()).collect() } } - -// #[cfg(test)] -// mod tests { -// use rstest::rstest; - -// use super::*; - -// #[rstest] -// #[case("a dog", "A Dog")] -// #[case("ein überfall", "Ein Überfall")] -// #[case("miXeD caSe", "miXeD caSe")] // Hmmm... behavior of `titlecase` crate -// // -// #[case("a dog's life 🐕", "A Dog's Life 🐕")] -// // -// #[case("a dime a dozen", "A Dime a Dozen")] -// fn test_titlecasing(#[case] input: &str, #[case] expected: &str) { -// let result = Titlecase::default().process(input); -// assert_eq!(result, expected); -// } -// } diff --git a/src/actions/replace/mod.rs b/src/actions/replace/mod.rs index 8a7af883..60690935 100644 --- a/src/actions/replace/mod.rs +++ b/src/actions/replace/mod.rs @@ -1,7 +1,11 @@ -use super::Action; -use log::info; +use super::{Action, ActionError}; +use crate::scoping::scope::ScopeContext; +use log::{debug, info}; use std::{error::Error, fmt}; use unescape::unescape; +use variables::{inject_variables, VariableExpressionError}; + +mod variables; /// Replaces input with a fixed string. /// @@ -47,7 +51,7 @@ use unescape::unescape; pub struct Replacement(String); impl TryFrom for Replacement { - type Error = ReplacementCreationError; + type Error = ReplacementError; /// Creates a new replacement from an owned string. /// @@ -73,48 +77,84 @@ impl TryFrom for Replacement { /// Creation fails due to invalid escape sequences. /// /// ``` - /// use srgn::actions::{Replacement, ReplacementCreationError}; + /// use srgn::actions::{Replacement, ReplacementError}; /// /// let replacement = Replacement::try_from(r"Invalid \z Escape".to_owned()); /// assert_eq!( /// replacement, - /// Err(ReplacementCreationError::InvalidEscapeSequences( + /// Err(ReplacementError::InvalidEscapeSequences( /// "Invalid \\z Escape".to_owned() /// )) /// ); /// ``` fn try_from(replacement: String) -> Result { - match unescape(&replacement) { - Some(res) => Ok(Self(res)), - None => Err(ReplacementCreationError::InvalidEscapeSequences( - replacement, - )), - } + let unescaped = + unescape(&replacement).ok_or(ReplacementError::InvalidEscapeSequences(replacement))?; + + Ok(Self(unescaped)) } } /// An error that can occur when creating a replacement. #[derive(Debug, PartialEq, Eq)] -pub enum ReplacementCreationError { +pub enum ReplacementError { /// The replacement contains invalid escape sequences. InvalidEscapeSequences(String), + /// The replacement contains an error in its variable expressions. + VariableError(VariableExpressionError), } -impl fmt::Display for ReplacementCreationError { +impl fmt::Display for ReplacementError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::InvalidEscapeSequences(replacement) => { write!(f, "Contains invalid escape sequences: '{replacement}'") } + Self::VariableError(err) => { + write!(f, "Error in variable expressions: {err}") + } } } } -impl Error for ReplacementCreationError {} +impl Error for ReplacementError {} + +impl From for ReplacementError { + fn from(value: VariableExpressionError) -> Self { + Self::VariableError(value) + } +} impl Action for Replacement { fn act(&self, input: &str) -> String { info!("Substituting '{}' with '{}'", input, self.0); + info!("This substitution is verbatim and does not take into account variables"); self.0.clone() } + + fn act_with_context( + &self, + _input: &str, + context: &ScopeContext, + ) -> Result { + match context { + ScopeContext::CaptureGroups(cgs) => { + debug!("Available capture group variables: {cgs:?}"); + + Ok(inject_variables(self.0.as_str(), cgs)?) + } + } + } +} + +impl From for ActionError { + fn from(value: VariableExpressionError) -> Self { + Self::ReplacementError(value.into()) + } +} + +impl From for ActionError { + fn from(value: ReplacementError) -> Self { + Self::ReplacementError(value) + } } diff --git a/src/actions/replace/variables.rs b/src/actions/replace/variables.rs new file mode 100644 index 00000000..2703c683 --- /dev/null +++ b/src/actions/replace/variables.rs @@ -0,0 +1,380 @@ +use crate::scoping::regex::CaptureGroup; +use log::trace; +use std::{collections::HashMap, error::Error, fmt}; + +type Variables<'a> = HashMap; + +/// In an input like `Hello $var World`, inject all variables. +/// +/// Variables are treated as they occur in regular expressions: they can be [named or +/// numbered](https://docs.rs/regex/latest/regex/struct.Captures.html). +#[allow(clippy::too_many_lines)] // :( +pub(super) fn inject_variables( + input: &str, + variables: &Variables, +) -> Result { + let mut state = State::default(); + let mut out = String::with_capacity(input.len()); + let mut to_remove = 0; // Remove this many pushed chars once a var is detected + + for c in input.chars() { + trace!( + "Injecting variables. Current output is: '{}', current state is {:?}", + out.escape_debug(), + state + ); + out.push(c); + + state = match (state, c) { + // Initial state + (State::Noop, '$') => { + to_remove = 1; + State::Start + } + (State::Start, '$') => { + // Ignore previous `$`, and only push one. + assert_eq!(out.pop().expect("was pushed in earlier loop"), '$',); + State::default() + } + (State::Noop, _) => State::default(), + + // Init + (State::Start, '{') => { + to_remove += 1; + State::BracedStart + } + (State::Start, 'a'..='z' | 'A'..='Z' | '_') => State::BuildingNamedVar { + name: String::from(c), + braced: false, + }, + (State::BracedStart, 'a'..='z' | 'A'..='Z' | '_') => State::BuildingNamedVar { + name: String::from(c), + braced: true, + }, + (State::Start, '0'..='9') => State::BuildingNumberedVar { + num: c.to_digit(10).expect("hard-coded digit is valid number") as usize, + braced: false, + }, + (State::BracedStart, '0'..='9') => State::BuildingNumberedVar { + num: c.to_digit(10).expect("hard-coded digit is valid number") as usize, + braced: true, + }, + + // Nothing useful matched, go back. This is order-dependent, see also + // https://github.com/rust-lang/rust-clippy/issues/860 + #[allow(clippy::match_same_arms)] + (State::Start | State::BracedStart, _) => State::Noop, + + // Building up + ( + State::BuildingNamedVar { mut name, braced }, + 'a'..='z' | 'A'..='Z' | '_' | '0'..='9', + ) => State::BuildingNamedVar { + name: { + name.push(c); + name + }, + braced, + }, + ( + State::BuildingNumberedVar { + num: magnitude, + braced, + }, + '0'..='9', + ) => State::BuildingNumberedVar { + num: { + magnitude * 10 + + c.to_digit(10).expect("hard-coded digit is valid number") as usize + }, + braced, + }, + + // Building stops + (State::BuildingNamedVar { name, braced: true }, '}') => { + to_remove += 1; + + State::FinishNamedVar(name) + } + (State::BuildingNumberedVar { num, braced: true }, '}') => { + to_remove += 1; + + State::FinishNumberedVar(num) + } + ( + State::BuildingNamedVar { + name, braced: true, .. + }, + _, + ) => return Err(VariableExpressionError::MismatchedBraces(name)), + ( + State::BuildingNumberedVar { + num, braced: true, .. + }, + _, + ) => return Err(VariableExpressionError::MismatchedBraces(num.to_string())), + + (State::FinishNamedVar(name) | State::BuildingNamedVar { name, .. }, _) => { + trace!("Finishing up named variable '{name}'"); + match variables.get(&CaptureGroup::Named(name.clone())) { + Some(repl) => { + let tail = out + .pop() + .expect("chars are pushed unconditionally, one is present"); + out.truncate(out.len() - (to_remove + name.len())); + out.push_str(repl); + out.push(tail); + } + None => return Err(VariableExpressionError::UndefinedVariable(name)), + }; + + match c { + '$' => { + to_remove = 1; + State::Start + } + _ => State::Noop, + } + } + (State::FinishNumberedVar(num) | State::BuildingNumberedVar { num, .. }, _) => { + trace!("Finishing up numbered variable '{num}'"); + match variables.get(&CaptureGroup::Numbered(num)) { + Some(repl) => { + let tail = out + .pop() + .expect("chars are pushed unconditionally, one is present"); + out.truncate(out.len() - (to_remove + width(num))); + out.push_str(repl); + out.push(tail); + } + None => { + return Err(VariableExpressionError::UndefinedVariable(num.to_string())) + } + }; + + match c { + '$' => { + to_remove = 1; + State::Start + } + _ => State::Noop, + } + } + } + } + + trace!( + "Finished character iteration, output is '{}', state is {:?}", + out.escape_debug(), + state + ); + + // Flush out any pending state + let last = out.chars().last(); + state = match (&state, last) { + ( + State::FinishNamedVar(name) + | State::BuildingNamedVar { + name, + braced: false, + }, + _, + ) => { + trace!("Finishing up named variable '{name}'"); + match variables.get(&CaptureGroup::Named(name.clone())) { + Some(repl) => { + out.truncate(out.len() - (to_remove + name.len())); + out.push_str(repl); + + state + } + None => return Err(VariableExpressionError::UndefinedVariable(name.clone())), + } + } + (State::FinishNumberedVar(num) | State::BuildingNumberedVar { num, braced: false }, _) => { + trace!("Finishing up numbered variable '{num}'"); + match variables.get(&CaptureGroup::Numbered(*num)) { + Some(repl) => { + out.truncate(out.len() - (to_remove + width(*num))); + out.push_str(repl); + + state + } + None => return Err(VariableExpressionError::UndefinedVariable(num.to_string())), + } + } + ( + State::BuildingNamedVar { + name, braced: true, .. + }, + _, + ) => return Err(VariableExpressionError::MismatchedBraces(name.clone())), + ( + State::BuildingNumberedVar { + num, braced: true, .. + }, + _, + ) => return Err(VariableExpressionError::MismatchedBraces(num.to_string())), + (State::Noop | State::Start | State::BracedStart, _) => state, + }; + + trace!( + "Done injecting variables, final output is '{}', final state is {:?}", + out.escape_debug(), + state + ); + + Ok(out) +} + +/// Gets the width in characters of a number. +fn width(num: usize) -> usize { + if num == 0 { + 1 + } else { + (num.ilog10() + 1) as usize + } +} + +/// State during injection of variables in an expression like `Hello $var World`. +#[derive(Debug, PartialEq, Eq, Default)] +enum State { + #[default] + /// Neutral state. + Noop, + /// The character denoting a variable declaration has been seen. + Start, + /// The detected, potential variable additionally starts with an opening brace. + BracedStart, + /// A named variable is detected and is being built up. + BuildingNamedVar { name: String, braced: bool }, + /// A numbered variable is detected and is being built up. + BuildingNumberedVar { num: usize, braced: bool }, + /// Processing of a named variable is done, finish it up. + FinishNamedVar(String), + /// Processing of a numbered variable is done, finish it up. + FinishNumberedVar(usize), +} + +/// An error in variable expressions. +#[derive(Debug, PartialEq, Eq)] +pub enum VariableExpressionError { + /// A variable expression with mismatched number of braces. + MismatchedBraces(String), + /// A requested variable was not passed. + UndefinedVariable(String), +} + +impl fmt::Display for VariableExpressionError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::MismatchedBraces(var) => { + write!(f, "Mismatched braces for variable: '{var}'") + } + Self::UndefinedVariable(var) => { + write!(f, "Undefined variable, unable to substitute: '{var}'") + } + } + } +} + +impl Error for VariableExpressionError {} + +#[cfg(test)] +mod test { + use super::*; + use rstest::*; + + #[fixture] + fn variables() -> Variables<'static> { + Variables::from([ + (CaptureGroup::Named("var1".to_owned()), "val1"), + (CaptureGroup::Named("VAR_2".to_owned()), "val2"), + (CaptureGroup::Numbered(2), "nval"), + ]) + } + + #[rstest] + // Base cases without variables + #[case("", Ok(""))] + #[case("Regular content", Ok("Regular content"))] + // Escaping works + #[case("I have $$5", Ok("I have $5"))] + // + // Basic named variable + #[case("$var1", Ok("val1"))] + #[case("$var1 ", Ok("val1 "))] + #[case(" $var1", Ok(" val1"))] + #[case(" $var1 ", Ok(" val1 "))] + // + // Basic named variables + #[case("$var1 $VAR_2", Ok("val1 val2"))] + #[case("$var1$VAR_2", Ok("val1val2"))] + #[case(" $var1 $VAR_2", Ok(" val1 val2"))] + #[case("$var1 $VAR_2 ", Ok("val1 val2 "))] + #[case(" $var1 $VAR_2 ", Ok(" val1 val2 "))] + // + // Basic numbered variables + #[case("$2", Ok("nval"))] + #[case("$2 ", Ok("nval "))] + #[case(" $2", Ok(" nval"))] + #[case(" $2 ", Ok(" nval "))] + // + // Mixed content + #[case("Hello $2 World $var1", Ok("Hello nval World val1"))] + // + // Braces for separation + #[case("${var1}", Ok("val1"))] + #[case("X${var1}X", Ok("Xval1X"))] + #[case("${2}", Ok("nval"))] + #[case("3${2}3", Ok("3nval3"))] + #[case("Hello${2}2U Sir${var1}Mister", Ok("Hellonval2U Sirval1Mister"))] + // + // Variable multiple times + #[case("$var1$var1", Ok("val1val1"))] + #[case("${var1}${var1}", Ok("val1val1"))] + #[case("${var1}$var1", Ok("val1val1"))] + #[case("${2}$2", Ok("nvalnval"))] + #[case("${var1}$var1 ${2}$2", Ok("val1val1 nvalnval"))] + // + // Undefined variables + #[case("$NO", Err(VariableExpressionError::UndefinedVariable("NO".to_owned())))] + #[case("$NO such thing", Err(VariableExpressionError::UndefinedVariable("NO".to_owned())))] + #[case("$NO$ON", Err(VariableExpressionError::UndefinedVariable("NO".to_owned())))] + // Numbers will be stringified + #[case("$1337", Err(VariableExpressionError::UndefinedVariable("1337".to_owned())))] + #[case("$1337 is missing", Err(VariableExpressionError::UndefinedVariable("1337".to_owned())))] + #[case("$1337$7331", Err(VariableExpressionError::UndefinedVariable("1337".to_owned())))] + // + // Improperly closed braces + #[case("${var1", Err(VariableExpressionError::MismatchedBraces("var1".to_owned())))] + #[case("${var1 woops", Err(VariableExpressionError::MismatchedBraces("var1".to_owned())))] + // Excess trailing ones are fine tho + #[case("${var1}}", Ok("val1}"))] + // + // Remaining edge cases + // Aborting a (brace) start + #[case("$?", Ok("$?"))] + #[case("${?", Ok("${?"))] + fn test_inject_variables( + #[case] expression: &str, + #[case] expected: Result<&str, VariableExpressionError>, + variables: Variables, + ) { + let result = inject_variables(expression, &variables); + let expected = expected.map(str::to_owned); + + assert_eq!(result, expected); + } + + #[rstest] + #[case(0, 1)] + #[case(1, 1)] + #[case(9, 1)] + #[case(10, 2)] + #[case(99, 2)] + #[case(100, 3)] + fn test_width(#[case] num: usize, #[case] expected: usize) { + assert_eq!(width(num), expected); + } +} diff --git a/src/actions/symbols/inversion.rs b/src/actions/symbols/inversion.rs index d06695ea..4abecf4d 100644 --- a/src/actions/symbols/inversion.rs +++ b/src/actions/symbols/inversion.rs @@ -1,8 +1,7 @@ -use crate::actions::Action; - use super::Symbol; #[cfg(doc)] use super::Symbols; +use crate::actions::Action; /// Inverts all symbols inserted by [`Symbols`]. /// diff --git a/src/lib.rs b/src/lib.rs index 77d7046e..11392916 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,7 +7,8 @@ //! //! For the library, much like for the binary, there are two main concepts: actions and //! scoping. The latter are manifested in [`ScopedView`]s. Over these, one can -//! [map][`ScopedView::map`] actions. Actions are all types implementing [`Action`]. +//! [map][`ScopedView::map_without_context`] actions. Actions are all types implementing +//! [`Action`]. //! //! # Examples //! @@ -133,7 +134,7 @@ //! //! let mut view = ScopedViewBuilder::new(input).build(); //! let action = German::new(true, false); // Excuse the bool ugliness. -//! view.map(&action); +//! view.map_without_context(&action); //! //! assert_eq!(view.to_string(), "Der Überflieger-Käfer! 🛩️"); //! # } diff --git a/src/main.rs b/src/main.rs index de881079..3fcdf041 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,4 @@ -use anyhow::Context; -use anyhow::Result; +use anyhow::{Context, Result}; use log::{debug, error, info, warn, LevelFilter}; use rayon::prelude::*; use srgn::actions::Deletion; @@ -196,7 +195,7 @@ fn apply( } for action in actions { - view.map(action); + view.map_with_context(action)?; } view.to_string() diff --git a/src/scoping/dosfix.rs b/src/scoping/dosfix.rs index 51dd88c0..cec5e10c 100644 --- a/src/scoping/dosfix.rs +++ b/src/scoping/dosfix.rs @@ -52,8 +52,7 @@ impl Scoper for DosFix { #[cfg(test)] mod tests { - use rstest::rstest; - + use super::*; use crate::scoping::{ scope::{ RWScope, RWScopes, @@ -61,17 +60,16 @@ mod tests { }, view::ScopedView, }; + use rstest::rstest; use std::borrow::Cow::Borrowed; - use super::*; - #[rstest] - #[case("a", ScopedView::new(RWScopes(vec![RWScope(In(Borrowed("a")))])))] - #[case("a\n", ScopedView::new(RWScopes(vec![RWScope(In(Borrowed("a\n")))])))] + #[case("a", ScopedView::new(RWScopes(vec![RWScope(In(Borrowed("a"), None))])))] + #[case("a\n", ScopedView::new(RWScopes(vec![RWScope(In(Borrowed("a\n"), None))])))] // #[case("\r", ScopedView::new(RWScopes(vec![RWScope(Out("\r"))])))] - #[case("a\r", ScopedView::new(RWScopes(vec![RWScope(In(Borrowed("a"))), RWScope(Out("\r"))])))] - #[case("a\r\n", ScopedView::new(RWScopes(vec![RWScope(In(Borrowed("a"))), RWScope(Out("\r")), RWScope(In(Borrowed("\n")))])))] + #[case("a\r", ScopedView::new(RWScopes(vec![RWScope(In(Borrowed("a"), None)), RWScope(Out("\r"))])))] + #[case("a\r\n", ScopedView::new(RWScopes(vec![RWScope(In(Borrowed("a"), None)), RWScope(Out("\r")), RWScope(In(Borrowed("\n"), None))])))] fn test_dos_fix(#[case] input: &str, #[case] expected: ScopedView) { let mut builder = crate::scoping::view::ScopedViewBuilder::new(input); let dosfix = DosFix; diff --git a/src/scoping/langs/csharp.rs b/src/scoping/langs/csharp.rs index f8394db8..e58c737a 100644 --- a/src/scoping/langs/csharp.rs +++ b/src/scoping/langs/csharp.rs @@ -73,7 +73,10 @@ impl From for TSQuery { impl Scoper for CSharp { fn scope<'viewee>(&self, input: &'viewee str) -> ROScopes<'viewee> { - ROScopes::from_raw_ranges(input, Self::scope_via_query(&mut self.query(), input)) + ROScopes::from_raw_ranges( + input, + Self::scope_via_query(&mut self.query(), input).into(), + ) } } diff --git a/src/scoping/langs/go.rs b/src/scoping/langs/go.rs index 82a116e3..28c388f8 100644 --- a/src/scoping/langs/go.rs +++ b/src/scoping/langs/go.rs @@ -79,7 +79,10 @@ impl From for TSQuery { impl Scoper for Go { fn scope<'viewee>(&self, input: &'viewee str) -> ROScopes<'viewee> { - ROScopes::from_raw_ranges(input, Self::scope_via_query(&mut self.query(), input)) + ROScopes::from_raw_ranges( + input, + Self::scope_via_query(&mut self.query(), input).into(), + ) } } diff --git a/src/scoping/langs/python.rs b/src/scoping/langs/python.rs index d68a85f2..8291f157 100644 --- a/src/scoping/langs/python.rs +++ b/src/scoping/langs/python.rs @@ -120,7 +120,10 @@ impl From for TSQuery { impl Scoper for Python { fn scope<'viewee>(&self, input: &'viewee str) -> ROScopes<'viewee> { - ROScopes::from_raw_ranges(input, Self::scope_via_query(&mut self.query(), input)) + ROScopes::from_raw_ranges( + input, + Self::scope_via_query(&mut self.query(), input).into(), + ) } } diff --git a/src/scoping/langs/rust.rs b/src/scoping/langs/rust.rs index 80973623..e4ccda3e 100644 --- a/src/scoping/langs/rust.rs +++ b/src/scoping/langs/rust.rs @@ -102,7 +102,10 @@ impl From for TSQuery { impl Scoper for Rust { fn scope<'viewee>(&self, input: &'viewee str) -> ROScopes<'viewee> { - ROScopes::from_raw_ranges(input, Self::scope_via_query(&mut self.query(), input)) + ROScopes::from_raw_ranges( + input, + Self::scope_via_query(&mut self.query(), input).into(), + ) } } diff --git a/src/scoping/langs/typescript.rs b/src/scoping/langs/typescript.rs index fd4b6fac..6368ac11 100644 --- a/src/scoping/langs/typescript.rs +++ b/src/scoping/langs/typescript.rs @@ -71,9 +71,10 @@ impl From for TSQuery { impl Scoper for TypeScript { fn scope<'viewee>(&self, input: &'viewee str) -> ROScopes<'viewee> { - let ranges = Self::scope_via_query(&mut self.query(), input); - - ROScopes::from_raw_ranges(input, ranges) + ROScopes::from_raw_ranges( + input, + Self::scope_via_query(&mut self.query(), input).into(), + ) } } diff --git a/src/scoping/literal.rs b/src/scoping/literal.rs index 4a9206a4..1c6c30a9 100644 --- a/src/scoping/literal.rs +++ b/src/scoping/literal.rs @@ -1,4 +1,5 @@ use super::{ROScopes, Scoper}; +use crate::ranges::Ranges; use log::trace; use std::{error::Error, fmt, ops::Range}; use unescape::unescape; @@ -42,7 +43,7 @@ impl Scoper for Literal { let ranges = { let len = self.0.len(); - let ranges = input + let ranges: Ranges = input .match_indices(&self.0) .map(|(i, _)| Range { start: i, @@ -55,14 +56,13 @@ impl Scoper for Literal { ranges }; - ROScopes::from_raw_ranges(input, ranges) + ROScopes::from_raw_ranges(input, ranges.into()) } } #[cfg(test)] mod tests { - use rstest::rstest; - + use super::*; use crate::scoping::{ scope::{ RWScope, RWScopes, @@ -70,20 +70,82 @@ mod tests { }, view::ScopedView, }; + use rstest::rstest; use std::borrow::Cow::Borrowed; - use super::*; - #[rstest] - #[case("a", "a", ScopedView::new(RWScopes(vec![RWScope(In(Borrowed("a")))])))] - #[case("aa", "a", ScopedView::new(RWScopes(vec![RWScope(In(Borrowed("a"))), RWScope(In(Borrowed("a")))])))] - #[case("aba", "a", ScopedView::new(RWScopes(vec![RWScope(In(Borrowed("a"))), RWScope(Out("b")), RWScope(In(Borrowed("a")))])))] + #[case( + "a", + "a", + ScopedView::new( + RWScopes(vec![ + RWScope(In(Borrowed("a"), None)), + ]) + ) + )] + #[case( + "aa", + "a", + ScopedView::new( + RWScopes(vec![ + RWScope(In(Borrowed("a"), None)), + RWScope(In(Borrowed("a"), None)), + ]) + ) + )] + #[case( + "aba", + "a", + ScopedView::new( + RWScopes(vec![ + RWScope(In(Borrowed("a"), None)), + RWScope(Out("b")), + RWScope(In(Borrowed("a"), None)), + ]) + ) + )] // - #[case(".", ".", ScopedView::new(RWScopes(vec![RWScope(In(Borrowed(".")))])))] - #[case(r"\.", ".", ScopedView::new(RWScopes(vec![RWScope(Out(r"\")), RWScope(In(Borrowed(".")))])))] - #[case(r".", r"\\.", ScopedView::new(RWScopes(vec![RWScope(Out(r"."))])))] + #[case( + ".", + ".", + ScopedView::new( + RWScopes(vec![ + RWScope(In(Borrowed("."), None)), + ]) + ) + )] + #[case( + r"\.", + ".", + ScopedView::new( + RWScopes(vec![ + RWScope(Out(r"\")), + RWScope(In(Borrowed("."), None)), + ]) + ) + )] + #[case( + r".", + r"\\.", + ScopedView::new( + RWScopes(vec![ + RWScope(Out(r".")), + ]) + ) + )] // - #[case("Hello\nWorld\n", "\n", ScopedView::new(RWScopes(vec![RWScope(Out("Hello")), RWScope(In(Borrowed("\n"))), RWScope(Out("World")), RWScope(In(Borrowed("\n")))])))] + #[case( + "Hello\nWorld\n", + "\n", + ScopedView::new( + RWScopes(vec![ + RWScope(Out("Hello")), + RWScope(In(Borrowed("\n"), None)), + RWScope(Out("World")), + RWScope(In(Borrowed("\n"), None)), + ]) + ) + )] fn test_literal_scoping( #[case] input: &str, #[case] literal: &str, diff --git a/src/scoping/regex.rs b/src/scoping/regex.rs index ed3a460c..23c8b589 100644 --- a/src/scoping/regex.rs +++ b/src/scoping/regex.rs @@ -1,9 +1,9 @@ +use super::scope::ScopeContext; use super::ROScopes; use super::Scoper; -use crate::ranges::Ranges; use crate::RegexPattern; use crate::GLOBAL_SCOPE; -use log::{debug, trace}; +use std::collections::HashMap; use std::error::Error; use std::fmt; @@ -11,13 +11,46 @@ use std::fmt; #[derive(Debug)] pub struct Regex { pattern: RegexPattern, + captures: Vec, +} + +/// A capture group in a regex, which can be either named (`(?REGEX)`) or numbered +/// (`(REGEX)`). +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum CaptureGroup { + /// A named capture group. + Named(String), + /// A numbered capture group, where 0 stands for the entire match. + Numbered(usize), +} + +impl fmt::Display for CaptureGroup { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let (value, r#type) = match self { + CaptureGroup::Named(name) => (name.clone(), "named"), + CaptureGroup::Numbered(number) => (number.to_string(), "numbered"), + }; + write!(f, "{value} ({type})") + } } impl Regex { /// Create a new regular expression. #[must_use] pub fn new(pattern: RegexPattern) -> Self { - Self { pattern } + let capture_names = pattern + .capture_names() + .enumerate() + .map(|(i, name)| match name { + Some(name) => CaptureGroup::Named(name.to_owned()), + None => CaptureGroup::Numbered(i), + }) + .collect(); + + Self { + pattern, + captures: capture_names, + } } } @@ -53,132 +86,339 @@ impl Default for Regex { impl Scoper for Regex { fn scope<'viewee>(&self, input: &'viewee str) -> ROScopes<'viewee> { - let has_capture_groups = self.pattern.captures_len() > 1; - - let ranges = if has_capture_groups { - trace!( - "Pattern '{}' has capture groups, iterating over matches", - self.pattern - ); - let mut ranges = Vec::new(); - for cap in self.pattern.captures_iter(input).flatten() { - let mut it = cap.iter(); - - let overall_match = it - .next() - // https://docs.rs/regex/1.9.5/regex/struct.SubCaptureMatches.html - .expect("Entered iterator of matches, but zeroth (whole) match missing") - .expect("First element guaranteed to be non-None (whole match)"); - trace!( - "Overall match: '{}' from index {} to {}", - overall_match.as_str().escape_debug(), - overall_match.start(), - overall_match.end() - ); - - let subranges = it.flatten().map(|m| m.range()).collect::>(); - trace!("Capture groups: {:?}", subranges); - - // Treat the capture groups specially - subranges - .iter() - .for_each(|subrange| ranges.extend(Ranges::from(subrange))); - - // Parts of the overall match, but not the capture groups: push as-is - ranges.extend(Ranges::from_iter([overall_match.range()]) - subranges); + let mut ranges = HashMap::new(); + for cap in self.pattern.captures_iter(input) { + match cap { + Ok(cap) => { + let capture_context: HashMap = self + .captures + .iter() + .filter_map(|cg| { + match cg { + CaptureGroup::Named(name) => cap.name(name.as_str()), + CaptureGroup::Numbered(number) => cap.get(*number), + } + .map(|r#match| (cg.clone(), r#match.as_str())) + }) + .collect(); + + ranges.insert( + cap.get(0) + .expect("index 0 guaranteed to contain whole match") + .range(), + Some(ScopeContext::CaptureGroups(capture_context)), + ); + } + // Let's blow up on purpose instead of silently continuing; any of + // these errors a user will likely want to know about, as they + // indicate serious failure. + Err(fancy_regex::Error::RuntimeError(e)) => { + panic!("regex exceeded runtime limits: {e}") + } + Err(fancy_regex::Error::ParseError(_, _) | fancy_regex::Error::CompileError(_)) => { + unreachable!("pattern was compiled successfully before") + } + Err(fancy_regex::Error::__Nonexhaustive) => { + unreachable!("implementation detail of fancy-regex") + } } + } - let res = ranges.into_iter().collect(); - debug!("Ranges to scope after regex: {:?}", res); - res - } else { - trace!( - "No capture groups in pattern '{}', short-circuiting", - input.escape_debug() - ); - - self.pattern - .find_iter(input) - .flatten() - .map(|m| m.range()) - .collect() - }; - - ROScopes::from_raw_ranges(input, ranges) + return ROScopes::from_raw_ranges(input, ranges); } } #[cfg(test)] mod tests { - use rstest::rstest; - + use super::*; use crate::scoping::{ scope::{ RWScope, RWScopes, Scope::{In, Out}, }, - view::ScopedView, + view::{ScopedView, ScopedViewBuilder}, }; + use rstest::rstest; use std::borrow::Cow::Borrowed as B; - use super::*; + /// Get 'Capture Group 0', the default which is always present. + #[allow(clippy::unnecessary_wraps)] + fn cg0(string: &str) -> Option { + Some(ScopeContext::CaptureGroups(HashMap::from([( + CaptureGroup::Numbered(0), + string, + )]))) + } + + /// Get naively numbered capture groups. + #[allow(clippy::unnecessary_wraps)] + fn cgs<'a>(strings: &[&'a str]) -> Option> { + let mut cgs = HashMap::new(); + + for (i, string) in strings.iter().enumerate() { + cgs.insert(CaptureGroup::Numbered(i), *string); + } + + Some(ScopeContext::CaptureGroups(cgs)) + } #[rstest] - #[case("a", "a", ScopedView::new(RWScopes(vec![RWScope(In(B("a")))])))] - #[case("aa", "a", ScopedView::new(RWScopes(vec![RWScope(In(B("a"))), RWScope(In(B("a")))])))] - #[case("aba", "a", ScopedView::new(RWScopes(vec![RWScope(In(B("a"))), RWScope(Out("b")), RWScope(In(B("a")))])))] + #[case( + "a", + "a", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B("a"), cg0("a"))), + ]) + ) + )] + #[case( + "aa", + "a", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B("a"), cg0("a"))), + RWScope(In(B("a"), cg0("a"))), + ]) + ) + )] + #[case( + "aba", + "a", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B("a"), cg0("a"))), + RWScope(Out("b")), + RWScope(In(B("a"), cg0("a"))), + ]) + ) + )] // - #[case("a", "", ScopedView::new(RWScopes(vec![RWScope(Out("a"))])))] - #[case("", "a", ScopedView::new(RWScopes(vec![])))] // Empty results are discarded + #[case( + "a", + "", + ScopedView::new(RWScopes( + vec![ + RWScope(Out("a")), + ]) + ) + )] + #[case( + "", + "a", + ScopedView::new(RWScopes( + // Empty results are discarded + vec![ + ]) + ) + )] // - #[case("a", "a", ScopedView::new(RWScopes(vec![RWScope(In(B("a")))])))] - #[case("a", "b", ScopedView::new(RWScopes(vec![RWScope(Out("a"))])))] + #[case( + "a", + "a", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B("a"), cg0("a"))), + ]) + ) + )] + #[case( + "a", + "b", + ScopedView::new(RWScopes( + vec![ + RWScope(Out("a")), + ]) + ) + )] // - #[case("a", ".*", ScopedView::new(RWScopes(vec![RWScope(In(B("a")))])))] - #[case("a", ".+?", ScopedView::new(RWScopes(vec![RWScope(In(B("a")))])))] + #[case( + "a", + ".*", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B("a"), cg0("a"))), + ]) + ) + )] + #[case( + "a", + ".+?", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B("a"), cg0("a"))), + ]) + ) + )] // - #[case("a\na", ".*", ScopedView::new(RWScopes(vec![RWScope(In(B("a"))), RWScope(Out("\n")), RWScope(In(B("a")))])))] - #[case("a\na", "(?s).*", ScopedView::new(RWScopes(vec![RWScope(In(B("a\na")))])))] // Dot matches newline + #[case( + "a\na", + ".*", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B("a"), cg0("a"))), + RWScope(Out("\n")), + RWScope(In(B("a"), cg0("a"))), + ]) + ) + )] + #[case( + "a\na", + "(?s).*", + ScopedView::new(RWScopes( + vec![ + // Dot matches newline + RWScope(In(B("a\na"), cg0("a\na"))), + ]) + ) + )] // - #[case("abc", "a", ScopedView::new(RWScopes(vec![RWScope(In(B("a"))), RWScope(Out("bc"))])))] + #[case( + "abc", + "a", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B("a"), cg0("a"))), + RWScope(Out("bc")), + ]) + ) + )] // - #[case("abc", r"\w", ScopedView::new(RWScopes(vec![RWScope(In(B("a"))), RWScope(In(B("b"))), RWScope(In(B("c")))])))] - #[case("abc", r"\W", ScopedView::new(RWScopes(vec![RWScope(Out("abc"))])))] - #[case("abc", r"\w+", ScopedView::new(RWScopes(vec![RWScope(In(B("abc")))])))] + #[case( + "abc", + r"\w", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B("a"), cg0("a"))), + RWScope(In(B("b"), cg0("b"))), + RWScope(In(B("c"), cg0("c"))), + ]) + ) + )] + #[case( + "abc", + r"\W", + ScopedView::new(RWScopes( + vec![ + RWScope(Out("abc")), + ]) + ) + )] + #[case( + "abc", + r"\w+", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B("abc"), cg0("abc"))), + ]) + ) + )] // - #[case("Work 69 on 420 words", r"\w+", ScopedView::new(RWScopes(vec![RWScope(In(B("Work"))), RWScope(Out(" ")), RWScope(In(B("69"))), RWScope(Out(" ")), RWScope(In(B("on"))), RWScope(Out(" ")), RWScope(In(B("420"))), RWScope(Out(" ")), RWScope(In(B("words")))])))] - #[case("Ignore 69 the 420 digits", r"\p{letter}+", ScopedView::new(RWScopes(vec![RWScope(In(B("Ignore"))), RWScope(Out(" 69 ")), RWScope(In(B("the"))), RWScope(Out(" 420 ")), RWScope(In(B("digits")))])))] + #[case( + "Work 69 on 420 words", + r"\w+", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B("Work"), cg0("Work"))), + RWScope(Out(" ")), + RWScope(In(B("69"), cg0("69"))), + RWScope(Out(" ")), + RWScope(In(B("on"), cg0("on"))), + RWScope(Out(" ")), + RWScope(In(B("420"), cg0("420"))), + RWScope(Out(" ")), + RWScope(In(B("words"), cg0("words"))), + ]) + ) + )] + #[case( + "Ignore 69 the 420 digits", + r"\p{letter}+", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B("Ignore"), cg0("Ignore"))), + RWScope(Out(" 69 ")), + RWScope(In(B("the"), cg0("the"))), + RWScope(Out(" 420 ")), + RWScope(In(B("digits"), cg0("digits"))), + ]) + ) + )] // - #[case(".", ".", ScopedView::new(RWScopes(vec![RWScope(In(B(".")))])))] - #[case(r"\.", ".", ScopedView::new(RWScopes(vec![RWScope(In(B(r"\"))), RWScope(In(B(".")))])))] - #[case(r".", r"\.", ScopedView::new(RWScopes(vec![RWScope(In(B(r".")))])))] - #[case(r"\.", r"\.", ScopedView::new(RWScopes(vec![RWScope(Out(r"\")), RWScope(In(B(r".")))])))] - #[case(r"\w", r"\w", ScopedView::new(RWScopes(vec![RWScope(Out(r"\")), RWScope(In(B(r"w")))])))] + #[case( + ".", + ".", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B("."), cg0("."))), + ]) + ) + )] + #[case( + r"\.", + ".", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B(r"\"), cg0(r"\"))), + RWScope(In(B("."), cg0("."))), + ]) + ) + )] + #[case( + r".", + r"\.", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B("."), cg0("."))), + ]) + ) + )] + #[case( + r"\.", + r"\.", + ScopedView::new(RWScopes( + vec![ + RWScope(Out(r"\")), + RWScope(In(B("."), cg0("."))), + ]) + ) + )] + #[case( + r"\w", + r"\w", + ScopedView::new(RWScopes( + vec![ + RWScope(Out(r"\")), + RWScope(In(B("w"), cg0("w"))), + ]) + ) + )] // // Capture groups - #[case(r"Hello", r"\w+", ScopedView::new(RWScopes(vec![RWScope(In(B(r"Hello")))])))] #[case( - r"Hello", r"(\w+)", + r"Hello", + r"\w+", + ScopedView::new(RWScopes( + vec![ + RWScope(In(B(r"Hello"), cg0("Hello"))), + ]) + ) + )] + #[case( + r"Hello", + r"(\w+)", ScopedView::new(RWScopes( vec![ - RWScope(In(B(r"H"))), - RWScope(In(B(r"e"))), - RWScope(In(B(r"l"))), - RWScope(In(B(r"l"))), - RWScope(In(B(r"o"))) + RWScope(In(B(r"Hello"), cgs(&["Hello", "Hello"]))), ] )) )] #[case( - r"Hello World", r"Hello (\w+)", + r"Hello World", + r"Hello (\w+)", ScopedView::new(RWScopes( vec![ - RWScope(In(B(r"Hello "))), - RWScope(In(B(r"W"))), - RWScope(In(B(r"o"))), - RWScope(In(B(r"r"))), - RWScope(In(B(r"l"))), - RWScope(In(B(r"d"))) + RWScope(In(B(r"Hello World"), cgs(&["Hello World", "World"]))), ] )) )] @@ -188,35 +428,25 @@ mod tests { r"(?P.+);(?P.+)", ScopedView::new(RWScopes( vec![ - RWScope(In(B(r#"""#))), - RWScope(In(B("e"))), - RWScope(In(B("r"))), - RWScope(In(B("r"))), - RWScope(In(B("o"))), - RWScope(In(B("r"))), - RWScope(In(B(r#"""#))), - RWScope(In(B(";"))), - RWScope(In(B(" "))), - RWScope(In(B(r#"""#))), - RWScope(In(B("x"))), - RWScope(In(B(r#"""#))), - RWScope(In(B(" "))), - RWScope(In(B("="))), - RWScope(In(B(">"))), - RWScope(In(B(" "))), - RWScope(In(B("%"))), - RWScope(In(B("x"))), - RWScope(In(B(","))), - RWScope(In(B(" "))), - RWScope(In(B(r#"""#))), - RWScope(In(B("y"))), - RWScope(In(B(r#"""#))), - RWScope(In(B(" "))), - RWScope(In(B("="))), - RWScope(In(B(">"))), - RWScope(In(B(" "))), - RWScope(In(B("%"))), - RWScope(In(B("y"))), + RWScope( + In(B(r#""error"; "x" => %x, "y" => %y"#), + Some( + ScopeContext::CaptureGroups(HashMap::from([ + ( + CaptureGroup::Numbered(0), + r#""error"; "x" => %x, "y" => %y"#, + ), + ( + CaptureGroup::Named("msg".into()), + r#""error""#, + ), + ( + CaptureGroup::Named("structure".into()), + r#" "x" => %x, "y" => %y"#, + ), + ])) + )) + ), ] )) )] @@ -225,7 +455,7 @@ mod tests { #[case] pattern: &str, #[case] expected: ScopedView, ) { - let mut builder = crate::scoping::view::ScopedViewBuilder::new(input); + let mut builder = ScopedViewBuilder::new(input); let regex = Regex::new(RegexPattern::new(pattern).unwrap()); builder.explode(®ex); let actual = builder.build(); @@ -331,7 +561,7 @@ mod tests { let scopes = scope.scope(&input); if scopes.0.iter().any(|s| match s { - ROScope(In(_)) => true, + ROScope(In(_, _)) => true, ROScope(Out(_)) => false, }) { n_matches += 1; diff --git a/src/scoping/scope.rs b/src/scoping/scope.rs index e88c8517..01526d80 100644 --- a/src/scoping/scope.rs +++ b/src/scoping/scope.rs @@ -1,15 +1,17 @@ +use super::regex::CaptureGroup; use crate::{ ranges::Ranges, scoping::scope::Scope::{In, Out}, }; +use itertools::Itertools; use log::{debug, trace}; -use std::{borrow::Cow, ops::Range}; +use std::{borrow::Cow, collections::HashMap, ops::Range}; /// Indicates whether a given string part is in scope. #[derive(Debug, Clone, PartialEq, Eq)] pub enum Scope<'viewee, T> { /// The given part is in scope for processing. - In(T), + In(T, Option>), /// The given part is out of scope for processing. /// /// Treated as immutable, view-only. @@ -41,6 +43,21 @@ impl<'viewee> ROScope<'viewee> { } } +/// Raw ranges, paired with optional context for content at that range. +pub type RangesWithContext<'viewee> = HashMap, Option>>; + +/// Converts, leaving unknown values [`Default`]. +/// +/// A convenience to support [`Ranges`] where there's no meaningful context to be +/// inserted for [`RangesWithContext`]. +impl<'viewee> From> for RangesWithContext<'viewee> { + fn from(val: Ranges) -> Self { + val.into_iter() + .map(|range| (range, Option::default())) + .collect() + } +} + impl<'viewee> ROScopes<'viewee> { /// Construct a new instance from the given raw ranges. /// @@ -52,15 +69,15 @@ impl<'viewee> ROScopes<'viewee> { /// /// Panics if the given `ranges` contain indices out-of-bounds for `input`. #[must_use] - pub fn from_raw_ranges(input: &'viewee str, ranges: Ranges) -> Self { + pub fn from_raw_ranges(input: &'viewee str, ranges: RangesWithContext<'viewee>) -> Self { trace!("Constructing scopes from raw ranges: {:?}", ranges); let mut scopes = Vec::with_capacity(ranges.len()); let mut last_end = 0; - for Range { start, end } in ranges { + for (Range { start, end }, context) in ranges.into_iter().sorted_by_key(|(r, _)| r.start) { scopes.push(ROScope(Out(&input[last_end..start]))); - scopes.push(ROScope(In(&input[start..end]))); + scopes.push(ROScope(In(&input[start..end], context))); last_end = end; } @@ -83,8 +100,8 @@ impl<'viewee> ROScopes<'viewee> { .0 .into_iter() .map(|s| match s { - ROScope(In(s)) => ROScope(Out(s)), - ROScope(Out(s)) => ROScope(In(s)), + ROScope(In(s, _)) => ROScope(Out(s)), + ROScope(Out(s)) => ROScope(In(s, None)), }) .collect(); trace!("Inverted scopes: {:?}", scopes); @@ -134,7 +151,7 @@ impl<'viewee> From<&'viewee ROScope<'viewee>> for &'viewee str { /// All variants contain such a slice, so this is a convenient method. fn from(s: &'viewee ROScope) -> Self { match s.0 { - In(s) | Out(s) => s, + In(s, _) | Out(s) => s, } } } @@ -142,7 +159,7 @@ impl<'viewee> From<&'viewee ROScope<'viewee>> for &'viewee str { impl<'viewee> From> for RWScope<'viewee> { fn from(s: ROScope<'viewee>) -> Self { match s.0 { - In(s) => RWScope(In(Cow::Borrowed(s))), + In(s, names) => RWScope(In(Cow::Borrowed(s), names)), Out(s) => RWScope(Out(s)), } } @@ -154,12 +171,22 @@ impl<'viewee> From<&'viewee RWScope<'viewee>> for &'viewee str { /// All variants contain such a slice, so this is a convenient method. fn from(s: &'viewee RWScope) -> Self { match &s.0 { - In(s) => s, + In(s, _) => s, Out(s) => s, } } } +/// Context accompanying a scope. +/// +/// For example, a scope might have been created by a regular expression, in which case +/// capture groups might have matched. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ScopeContext<'viewee> { + /// Regular expression capture groups mapped to the content they matched. + CaptureGroups(HashMap), +} + #[cfg(test)] mod tests { use super::*; @@ -167,37 +194,37 @@ mod tests { #[rstest] // Base cases - #[case(ROScopes(vec![ROScope(In("abc"))]), "abc", true)] - #[case(ROScopes(vec![ROScope(In("cba"))]), "cba", true)] - #[case(ROScopes(vec![ROScope(In("🦀"))]), "🦀", true)] - #[case(ROScopes(vec![ROScope(In("🦀"))]), "🤗", false)] + #[case(ROScopes(vec![ROScope(In("abc", None))]), "abc", true)] + #[case(ROScopes(vec![ROScope(In("cba", None))]), "cba", true)] + #[case(ROScopes(vec![ROScope(In("🦀", None))]), "🦀", true)] + #[case(ROScopes(vec![ROScope(In("🦀", None))]), "🤗", false)] // // Substring matching - #[case(ROScopes(vec![ROScope(In("a")), ROScope(In("b"))]), "ab", true)] - #[case(ROScopes(vec![ROScope(In("a")), ROScope(In("b")), ROScope(In("c"))]), "abc", true)] + #[case(ROScopes(vec![ROScope(In("a", None)), ROScope(In("b", None))]), "ab", true)] + #[case(ROScopes(vec![ROScope(In("a", None)), ROScope(In("b", None)), ROScope(In("c", None))]), "abc", true)] // - #[case(ROScopes(vec![ROScope(In("a")), ROScope(In("b"))]), "ac", false)] - #[case(ROScopes(vec![ROScope(In("a")), ROScope(In("b"))]), "a", false)] - #[case(ROScopes(vec![ROScope(In("a")), ROScope(In("b"))]), "b", false)] - #[case(ROScopes(vec![ROScope(In("a")), ROScope(In("b")), ROScope(In("c"))]), "acc", false)] + #[case(ROScopes(vec![ROScope(In("a", None)), ROScope(In("b", None))]), "ac", false)] + #[case(ROScopes(vec![ROScope(In("a", None)), ROScope(In("b", None))]), "a", false)] + #[case(ROScopes(vec![ROScope(In("a", None)), ROScope(In("b", None))]), "b", false)] + #[case(ROScopes(vec![ROScope(In("a", None)), ROScope(In("b", None)), ROScope(In("c", None))]), "acc", false)] // // Length mismatch - #[case(ROScopes(vec![ROScope(In("abc"))]), "abcd", false)] - #[case(ROScopes(vec![ROScope(In("abcd"))]), "abc", false)] + #[case(ROScopes(vec![ROScope(In("abc", None))]), "abcd", false)] + #[case(ROScopes(vec![ROScope(In("abcd", None))]), "abc", false)] // // Partial emptiness - #[case(ROScopes(vec![ROScope(In("abc"))]), "", false)] - #[case(ROScopes(vec![ROScope(In(""))]), "abc", false)] + #[case(ROScopes(vec![ROScope(In("abc", None))]), "", false)] + #[case(ROScopes(vec![ROScope(In("", None))]), "abc", false)] #[case(ROScopes(vec![ROScope(Out(""))]), "abc", false)] - #[case(ROScopes(vec![ROScope(In("")), ROScope(Out(""))]), "abc", false)] + #[case(ROScopes(vec![ROScope(In("", None)), ROScope(Out(""))]), "abc", false)] // // Full emptiness - #[case(ROScopes(vec![ROScope(In(""))]), "", true)] + #[case(ROScopes(vec![ROScope(In("", None))]), "", true)] #[case(ROScopes(vec![ROScope(Out(""))]), "", true)] - #[case(ROScopes(vec![ROScope(In("")), ROScope(Out(""))]), "", true)] + #[case(ROScopes(vec![ROScope(In("", None)), ROScope(Out(""))]), "", true)] // // Types of scope doesn't matter - #[case(ROScopes(vec![ROScope(In("a"))]), "a", true)] + #[case(ROScopes(vec![ROScope(In("a", None))]), "a", true)] #[case(ROScopes(vec![ROScope(Out("a"))]), "a", true)] fn test_scoped_view_str_equality( #[case] scopes: ROScopes<'_>, diff --git a/src/scoping/view.rs b/src/scoping/view.rs index 4076dfa4..865080ae 100644 --- a/src/scoping/view.rs +++ b/src/scoping/view.rs @@ -1,5 +1,7 @@ -use crate::actions::{self, Action, ReplacementCreationError}; +use crate::actions::{self, Action, ActionError}; use crate::scoping::dosfix::DosFix; +#[cfg(doc)] +use crate::scoping::scope::ScopeContext; use crate::scoping::scope::{ ROScope, ROScopes, RWScope, RWScopes, Scope::{In, Out}, @@ -12,8 +14,8 @@ use std::fmt; /// A view of some input, sorted into parts, which are either [`In`] or [`Out`] of scope /// for processing. /// -/// The view is **writable**. It can be manipulated by [mapping][`Self::map`] -/// [`Action`]s over it. +/// The view is **writable**. It can be manipulated by +/// [mapping][`Self::map_without_context`] [`Action`]s over it. /// /// The main avenue for constructing a view is [`Self::builder`]. #[derive(Debug, Clone, PartialEq, Eq)] @@ -40,19 +42,48 @@ impl<'viewee> ScopedView<'viewee> { /// Apply an `action` to all [`In`] scope items contained in this view. /// /// They are **replaced** with whatever the action returns for the particular scope. + /// This method is infallible, as it does not access any [`ScopeContext`]. /// /// See implementors of [`Action`] for available types. - pub fn map(&mut self, action: &impl Action) -> &mut Self { + #[allow(clippy::missing_panics_doc)] // 🤞 + pub fn map_without_context(&mut self, action: &impl Action) -> &mut Self { + self.map_impl(action, false) + .expect("not accessing context, so is infallible"); + + self + } + + /// Same as [`Self::map_without_context`], but will access any [`ScopeContext`], + /// which is fallible. + /// + /// # Errors + /// + /// See the concrete type of the [`Err`] variant for when this method errors. + pub fn map_with_context(&mut self, action: &impl Action) -> Result<&mut Self, ActionError> { + self.map_impl(action, true)?; + + Ok(self) + } + + fn map_impl( + &mut self, + action: &impl Action, + use_context: bool, + ) -> Result<&mut Self, ActionError> { for scope in &mut self.scopes.0 { match scope { - RWScope(In(s)) => { - let res = action.act(s); + RWScope(In(s, context)) => { + debug!("Mapping with context: {:?}", context); + let res = match (&context, use_context) { + (Some(c), true) => action.act_with_context(s, c)?, + _ => action.act(s), + }; debug!( "Replacing '{}' with '{}'", s.escape_debug(), res.escape_debug() ); - *scope = RWScope(In(Cow::Owned(res))); + *scope = RWScope(In(Cow::Owned(res), context.clone())); } RWScope(Out(s)) => { debug!("Appending '{}'", s.escape_debug()); @@ -60,7 +91,7 @@ impl<'viewee> ScopedView<'viewee> { } } - self + Ok(self) } /// Squeeze all consecutive [`In`] scopes into a single occurrence (the first one). @@ -69,8 +100,8 @@ impl<'viewee> ScopedView<'viewee> { let mut prev_was_in = false; self.scopes.0.retain(|scope| { - let keep = !(prev_was_in && matches!(scope, RWScope(In(_)))); - prev_was_in = matches!(scope, RWScope(In(_))); + let keep = !(prev_was_in && matches!(scope, RWScope(In(_, _)))); + prev_was_in = matches!(scope, RWScope(In(_, _))); trace!("keep: {}, scope: {:?}", keep, scope); keep }); @@ -84,7 +115,7 @@ impl<'viewee> ScopedView<'viewee> { #[must_use] pub fn has_any_in_scope(&self) -> bool { self.scopes.0.iter().any(|s| match s { - RWScope(In(_)) => true, + RWScope(In(_, _)) => true, RWScope(Out(_)) => false, }) } @@ -94,76 +125,84 @@ impl<'viewee> ScopedView<'viewee> { /// /// Where actions don't take arguments, neither do the methods. impl<'viewee> ScopedView<'viewee> { - /// Apply the default [`actions::Deletion`] action to this view (see [`Self::map`]). + /// Apply the default [`actions::Deletion`] action to this view (see + /// [`Self::map_without_context`]). pub fn delete(&mut self) -> &mut Self { let action = actions::Deletion::default(); - self.map(&action) + self.map_without_context(&action) } - /// Apply the default [`actions::German`] action to this view (see [`Self::map`]). + /// Apply the default [`actions::German`] action to this view (see + /// [`Self::map_without_context`]). #[cfg(feature = "german")] pub fn german(&mut self) -> &mut Self { let action = actions::German::default(); - self.map(&action) + self.map_without_context(&action) } - /// Apply the default [`actions::Lower`] action to this view (see [`Self::map`]). + /// Apply the default [`actions::Lower`] action to this view (see + /// [`Self::map_without_context`]). pub fn lower(&mut self) -> &mut Self { let action = actions::Lower::default(); - self.map(&action) + self.map_without_context(&action) } /// Apply the default [`actions::Normalization`] action to this view (see - /// [`Self::map`]). + /// [`Self::map_without_context`]). pub fn normalize(&mut self) -> &mut Self { let action = actions::Normalization::default(); - self.map(&action) + self.map_without_context(&action) } - /// Apply the [`actions::Replacement`] action to this view (see [`Self::map`]). + /// Apply the [`actions::Replacement`] action to this view (see + /// [`Self::map_with_context`]). /// /// ## Errors /// /// For why and how this can fail, see the implementation of [`TryFrom`] for /// [`actions::Replacement`]. - pub fn replace(&mut self, replacement: String) -> Result<&mut Self, ReplacementCreationError> { + pub fn replace(&mut self, replacement: String) -> Result<&mut Self, ActionError> { let action = actions::Replacement::try_from(replacement)?; - Ok(self.map(&action)) + self.map_with_context(&action) } - /// Apply the [`actions::Symbols`] action to this view (see [`Self::map`]). + /// Apply the [`actions::Symbols`] action to this view (see + /// [`Self::map_without_context`]). #[cfg(feature = "symbols")] pub fn symbols(&mut self) -> &mut Self { let action = actions::Symbols::default(); - self.map(&action) + self.map_without_context(&action) } - /// Apply the [`actions::SymbolsInversion`] action to this view (see [`Self::map`]). + /// Apply the [`actions::SymbolsInversion`] action to this view (see + /// [`Self::map_without_context`]). #[cfg(feature = "symbols")] pub fn invert_symbols(&mut self) -> &mut Self { let action = actions::SymbolsInversion::default(); - self.map(&action) + self.map_without_context(&action) } - /// Apply the default [`actions::Titlecase`] action to this view (see [`Self::map`]). + /// Apply the default [`actions::Titlecase`] action to this view (see + /// [`Self::map_without_context`]). pub fn titlecase(&mut self) -> &mut Self { let action = actions::Titlecase::default(); - self.map(&action) + self.map_without_context(&action) } - /// Apply the default [`actions::Upper`] action to this view (see [`Self::map`]). + /// Apply the default [`actions::Upper`] action to this view (see + /// [`Self::map_without_context`]). pub fn upper(&mut self) -> &mut Self { let action = actions::Upper::default(); - self.map(&action) + self.map_without_context(&action) } } @@ -196,7 +235,7 @@ impl<'viewee> ScopedViewBuilder<'viewee> { #[must_use] pub fn new(input: &'viewee str) -> Self { Self { - scopes: ROScopes(vec![ROScope(In(input))]), + scopes: ROScopes(vec![ROScope(In(input, None))]), viewee: input, } } @@ -216,7 +255,7 @@ impl<'viewee> ScopedViewBuilder<'viewee> { /// See [`DosFix`]. fn apply_dos_line_endings_fix(&mut self) { if self.scopes.0.windows(2).any(|window| match window { - [ROScope(In(left)), ROScope(Out(right))] => { + [ROScope(In(left, _)), ROScope(Out(right))] => { left.ends_with('\r') && right.starts_with('\n') } _ => false, @@ -261,7 +300,7 @@ impl<'viewee> ScopedViewBuilder<'viewee> { } match scope { - ROScope(In(s)) => { + ROScope(In(s, _)) => { let mut new_scopes = scoper.scope(s); new_scopes.0.retain(|s| !s.is_empty()); new.extend(new_scopes.0);