Merge pull request #332 from mgeisler/word-separator-trait

Add `WordSeparator` trait to allow customizing how words are found in a line of text
mgeisler · May 2, 2021 · 48b9480 · 48b9480
2 parents ab57499 + 14faf1f
commit 48b9480
Show file tree

Hide file tree

Showing 9 changed files with 383 additions and 211 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,29 @@
 This file lists the most important changes made in each release of
 `textwrap`.
 
+## Unreleased
+
+This is a major feature release which adds a new generic type
+parameter to the `Options` struct. This new parameter lets you specify
+how words are found in the text.
+
+Common usages of textwrap stays unchanged, but if you previously
+spelled out the full type for `Options`, you now need to take th extra
+type parameter into account. This means that
+
+```rust
+let options: Options<HyphenSplitter> = Options::new(80);
+```
+
+need to change to
+
+```rust
+let options: Options<AsciiSpace, HyphenSplitter> = Options::new(80);
+```
+
+You won’t see any chance if you call `wrap` directly with a width or
+with an `Options` constructed on the fly.
+
 ## Version 0.13.4 (2021-02-23)
 
 This release removes `println!` statements which was left behind in

diff --git a/examples/interactive.rs b/examples/interactive.rs
@@ -21,7 +21,8 @@ mod unix_only {
     use termion::{color, cursor, style};
     #[cfg(feature = "smawk")]
     use textwrap::core::WrapAlgorithm::{FirstFit, OptimalFit};
-    use textwrap::{wrap, HyphenSplitter, NoHyphenation, Options, WordSplitter};
+    use textwrap::{wrap, AsciiSpace, Options};
+    use textwrap::{HyphenSplitter, NoHyphenation, WordSplitter};
 
     #[cfg(feature = "hyphenation")]
     use hyphenation::{Language, Load, Standard};
@@ -56,7 +57,7 @@ mod unix_only {
 
     fn draw_text<'a>(
         text: &str,
-        options: &Options<'a>,
+        options: &Options<'a, AsciiSpace, Box<dyn WordSplitter>>,
         splitter_label: &str,
         stdout: &mut RawTerminal<io::Stdout>,
     ) -> Result<(), io::Error> {
@@ -256,7 +257,8 @@ mod unix_only {
         }
 
         let mut label = labels.pop().unwrap();
-        let mut options: Options = Options::new(35).splitter(Box::new(HyphenSplitter));
+        let mut options =
+            Options::new(35).splitter(Box::new(HyphenSplitter) as Box<dyn WordSplitter>);
         options.break_words = false;
         options.splitter = splitters.pop().unwrap();
 

diff --git a/examples/layout.rs b/examples/layout.rs
@@ -1,12 +1,12 @@
-use textwrap::{wrap, HyphenSplitter, Options};
+use textwrap::{wrap, HyphenSplitter, Options, WordSplitter};
 
 fn main() {
     let example = "Memory safety without garbage collection. \
                    Concurrency without data races. \
                    Zero-cost abstractions.";
     let mut prev_lines = vec![];
 
-    let mut options: Options = Options::new(0).splitter(Box::new(HyphenSplitter));
+    let mut options = Options::new(0).splitter(Box::new(HyphenSplitter) as Box<dyn WordSplitter>);
     #[cfg(feature = "hyphenation")]
     {
         use hyphenation::Load;

diff --git a/examples/wasm/src/lib.rs b/examples/wasm/src/lib.rs
@@ -160,7 +160,7 @@ pub fn draw_wrapped_text(
 
     let mut lineno = 0;
     for line in text.split('\n') {
-        let words = core::find_words(line);
+        let words = options.word_separator.find_words(line);
         let split_words = core::split_words(words, &options);
 
         let canvas_words = split_words

diff --git a/src/core.rs b/src/core.rs
@@ -8,8 +8,9 @@
 //! something:
 //!
 //! 1. Split your input into [`Fragment`]s. These are abstract blocks
-//!    of text or content which can be wrapped into lines. You can use
-//!    [`find_words`] to do this for text.
+//!    of text or content which can be wrapped into lines. See
+//!    [`WordSeparator`](crate::WordSeparator) for how to do this for
+//!    text.
 //!
 //! 2. Potentially split your fragments into smaller pieces. This
 //!    allows you to implement things like hyphenation. If wrapping
@@ -48,7 +49,7 @@ const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e';
 /// `chars` provide the following characters. The `chars` will be
 /// modified if `ch` is the start of an ANSI escape sequence.
 #[inline]
-fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool {
+pub(crate) fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool {
     if ch == CSI.0 && chars.next() == Some(CSI.1) {
         // We have found the start of an ANSI escape code, typically
         // used for colored terminal text. We skip until we find a
@@ -326,51 +327,6 @@ impl Fragment for Word<'_> {
     }
 }
 
-/// Split line into words separated by regions of `' '` characters.
-///
-/// # Examples
-///
-/// ```
-/// use textwrap::core::{find_words, Fragment, Word};
-/// let words = find_words("Hello World!").collect::<Vec<_>>();
-/// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]);
-/// assert_eq!(words[0].width(), 5);
-/// assert_eq!(words[0].whitespace_width(), 1);
-/// assert_eq!(words[0].penalty_width(), 0);
-/// ```
-pub fn find_words(line: &str) -> impl Iterator<Item = Word> {
-    let mut start = 0;
-    let mut in_whitespace = false;
-    let mut char_indices = line.char_indices();
-
-    std::iter::from_fn(move || {
-        // for (idx, ch) in char_indices does not work, gives this
-        // error:
-        //
-        // > cannot move out of `char_indices`, a captured variable in
-        // > an `FnMut` closure
-        #[allow(clippy::while_let_on_iterator)]
-        while let Some((idx, ch)) = char_indices.next() {
-            if in_whitespace && ch != ' ' {
-                let word = Word::from(&line[start..idx]);
-                start = idx;
-                in_whitespace = ch == ' ';
-                return Some(word);
-            }
-
-            in_whitespace = ch == ' ';
-        }
-
-        if start < line.len() {
-            let word = Word::from(&line[start..]);
-            start = line.len();
-            return Some(word);
-        }
-
-        None
-    })
-}
-
 /// Split words into smaller words according to the split points given
 /// by `options`.
 ///
@@ -398,9 +354,9 @@ pub fn find_words(line: &str) -> impl Iterator<Item = Word> {
 ///     vec![Word::from("foo-bar")]
 /// );
 /// ```
-pub fn split_words<'a, I, S>(
+pub fn split_words<'a, I, T, S>(
     words: I,
-    options: &'a Options<'a, S>,
+    options: &'a Options<'a, T, S>,
 ) -> impl Iterator<Item = Word<'a>>
 where
     I: IntoIterator<Item = Word<'a>>,
@@ -510,7 +466,8 @@ pub enum WrapAlgorithm {
 /// a large gap:
 ///
 /// ```
-/// use textwrap::core::{find_words, wrap_first_fit, Word};
+/// use textwrap::core::{wrap_first_fit, Word};
+/// use textwrap::{AsciiSpace, WordSeparator};
 ///
 /// // Helper to convert wrapped lines to a Vec<String>.
 /// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
@@ -520,7 +477,7 @@ pub enum WrapAlgorithm {
 /// }
 ///
 /// let text = "These few words will unfortunately not wrap nicely.";
-/// let words = find_words(text).collect::<Vec<_>>();
+/// let words = AsciiSpace.find_words(text).collect::<Vec<_>>();
 /// assert_eq!(lines_to_strings(wrap_first_fit(&words, |_| 15)),
 ///            vec!["These few words",
 ///                 "will",  // <-- short line
@@ -748,83 +705,6 @@ mod tests {
         assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20);
     }
 
-    #[test]
-    fn find_words_empty() {
-        assert_iter_eq!(find_words(""), vec![]);
-    }
-
-    #[test]
-    fn find_words_single_word() {
-        assert_iter_eq!(find_words("foo"), vec![Word::from("foo")]);
-    }
-
-    #[test]
-    fn find_words_two_words() {
-        assert_iter_eq!(
-            find_words("foo bar"),
-            vec![Word::from("foo "), Word::from("bar")]
-        );
-    }
-
-    #[test]
-    fn find_words_multiple_words() {
-        assert_iter_eq!(
-            find_words("foo bar baz"),
-            vec![Word::from("foo "), Word::from("bar "), Word::from("baz")]
-        );
-    }
-
-    #[test]
-    fn find_words_whitespace() {
-        assert_iter_eq!(find_words("    "), vec![Word::from("    ")]);
-    }
-
-    #[test]
-    fn find_words_inter_word_whitespace() {
-        assert_iter_eq!(
-            find_words("foo   bar"),
-            vec![Word::from("foo   "), Word::from("bar")]
-        )
-    }
-
-    #[test]
-    fn find_words_trailing_whitespace() {
-        assert_iter_eq!(find_words("foo   "), vec![Word::from("foo   ")]);
-    }
-
-    #[test]
-    fn find_words_leading_whitespace() {
-        assert_iter_eq!(
-            find_words("   foo"),
-            vec![Word::from("   "), Word::from("foo")]
-        );
-    }
-
-    #[test]
-    fn find_words_multi_column_char() {
-        assert_iter_eq!(
-            find_words("\u{1f920}"), // cowboy emoji 🤠
-            vec![Word::from("\u{1f920}")]
-        );
-    }
-
-    #[test]
-    fn find_words_hyphens() {
-        assert_iter_eq!(find_words("foo-bar"), vec![Word::from("foo-bar")]);
-        assert_iter_eq!(
-            find_words("foo- bar"),
-            vec![Word::from("foo- "), Word::from("bar")]
-        );
-        assert_iter_eq!(
-            find_words("foo - bar"),
-            vec![Word::from("foo "), Word::from("- "), Word::from("bar")]
-        );
-        assert_iter_eq!(
-            find_words("foo -bar"),
-            vec![Word::from("foo "), Word::from("-bar")]
-        );
-    }
-
     #[test]
     fn split_words_no_words() {
         assert_iter_eq!(split_words(vec![], &Options::new(80)), vec![]);