Skip to content

Commit

Permalink
Move WordSplitter and split_words to new word_splitters module
Browse files Browse the repository at this point in the history
This moves all “word splitting” or “hyphenation” functionality into
its own module. The three main traits now all have their own module,
and the `core` module is much slimmer.
  • Loading branch information
mgeisler committed May 30, 2021
1 parent c1e871f commit c432fa6
Show file tree
Hide file tree
Showing 8 changed files with 270 additions and 240 deletions.
11 changes: 6 additions & 5 deletions examples/interactive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ mod unix_only {
use termion::raw::{IntoRawMode, RawTerminal};
use termion::screen::AlternateScreen;
use termion::{color, cursor, style};
use textwrap::{word_separators, wrap_algorithms};
use textwrap::{word_separators, word_splitters, wrap_algorithms};
use textwrap::{wrap, Options};
use textwrap::{HyphenSplitter, NoHyphenation, WordSplitter};

#[cfg(feature = "hyphenation")]
use hyphenation::{Language, Load, Standard};
Expand Down Expand Up @@ -60,7 +59,7 @@ mod unix_only {
'a,
Box<dyn wrap_algorithms::WrapAlgorithm>,
Box<dyn word_separators::WordSeparator>,
Box<dyn WordSplitter>,
Box<dyn word_splitters::WordSplitter>,
>,
splitter_label: &str,
stdout: &mut RawTerminal<io::Stdout>,
Expand Down Expand Up @@ -238,8 +237,10 @@ mod unix_only {
#[cfg(feature = "smawk")]
wrap_algorithms.push(Box::new(wrap_algorithms::OptimalFit));

let mut splitters: Vec<Box<dyn WordSplitter>> =
vec![Box::new(HyphenSplitter), Box::new(NoHyphenation)];
let mut splitters: Vec<Box<dyn word_splitters::WordSplitter>> = vec![
Box::new(word_splitters::HyphenSplitter),
Box::new(word_splitters::NoHyphenation),
];
let mut splitter_labels: Vec<String> =
splitters.iter().map(|s| format!("{:?}", s)).collect();

Expand Down
3 changes: 2 additions & 1 deletion examples/layout.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use textwrap::{wrap, HyphenSplitter, Options, WordSplitter};
use textwrap::word_splitters::{HyphenSplitter, WordSplitter};
use textwrap::{wrap, Options};

fn main() {
let example = "Memory safety without garbage collection. \
Expand Down
2 changes: 1 addition & 1 deletion examples/wasm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ pub fn draw_wrapped_text(
let mut lineno = 0;
for line in text.split('\n') {
let words = word_separator.find_words(line);
let split_words = core::split_words(words, &word_splitter);
let split_words = core::word_splitters::split_words(words, &word_splitter);

let canvas_words = split_words
.flat_map(|word| {
Expand Down
158 changes: 5 additions & 153 deletions src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
//! how to do this for text.
//!
//! 2. Potentially split your fragments into smaller pieces. This
//! allows you to implement things like hyphenation. If wrapping
//! text, [`split_words`] can help you do this.
//! allows you to implement things like hyphenation. If you are
//! wrapping text represented as a sequence of [`Word`]s, then you
//! can use [`split_words`](crate::word_splitters::split_words) can
//! help you do this.
//!
//! 3. Potentially break apart fragments that are still too large to
//! fit on a single line. This is implemented in [`break_words`].
Expand All @@ -33,8 +35,6 @@
//! the functionality here is not sufficient or if you have ideas for
//! improving it. We would love to hear from you!
use crate::WordSplitter;

/// The CSI or “Control Sequence Introducer” introduces an ANSI escape
/// sequence. This is typically used for colored text and will be
/// ignored when computing the text width.
Expand Down Expand Up @@ -221,7 +221,7 @@ pub struct Word<'a> {
/// Penalty string to insert if the word falls at the end of a line.
pub penalty: &'a str,
// Cached width in columns.
width: usize,
pub(crate) width: usize,
}

impl std::ops::Deref for Word<'_> {
Expand Down Expand Up @@ -323,70 +323,6 @@ impl Fragment for Word<'_> {
}
}

/// Split words into smaller words according to the split points given
/// by `options`.
///
/// Note that we split all words, regardless of their length. This is
/// to more cleanly separate the business of splitting (including
/// automatic hyphenation) from the business of word wrapping.
///
/// # Examples
///
/// ```
/// use textwrap::core::{split_words, Word};
/// use textwrap::{NoHyphenation, HyphenSplitter};
///
/// assert_eq!(
/// split_words(vec![Word::from("foo-bar")], &HyphenSplitter).collect::<Vec<_>>(),
/// vec![Word::from("foo-"), Word::from("bar")]
/// );
///
/// // The NoHyphenation splitter ignores the '-':
/// assert_eq!(
/// split_words(vec![Word::from("foo-bar")], &NoHyphenation).collect::<Vec<_>>(),
/// vec![Word::from("foo-bar")]
/// );
/// ```
pub fn split_words<'a, I, WordSplit>(
words: I,
word_splitter: &'a WordSplit,
) -> impl Iterator<Item = Word<'a>>
where
I: IntoIterator<Item = Word<'a>>,
WordSplit: WordSplitter,
{
words.into_iter().flat_map(move |word| {
let mut prev = 0;
let mut split_points = word_splitter.split_points(&word).into_iter();
std::iter::from_fn(move || {
if let Some(idx) = split_points.next() {
let need_hyphen = !word[..idx].ends_with('-');
let w = Word {
word: &word.word[prev..idx],
width: display_width(&word[prev..idx]),
whitespace: "",
penalty: if need_hyphen { "-" } else { "" },
};
prev = idx;
return Some(w);
}

if prev < word.word.len() || prev == 0 {
let w = Word {
word: &word.word[prev..],
width: display_width(&word[prev..]),
whitespace: word.whitespace,
penalty: word.penalty,
};
prev = word.word.len() + 1;
return Some(w);
}

None
})
})
}

/// Forcibly break words wider than `line_width` into smaller words.
///
/// This simply calls [`Word::break_apart`] on words that are too
Expand All @@ -410,18 +346,10 @@ where
#[cfg(test)]
mod tests {
use super::*;
use crate::HyphenSplitter;

#[cfg(feature = "unicode-width")]
use unicode_width::UnicodeWidthChar;

// Like assert_eq!, but the left expression is an iterator.
macro_rules! assert_iter_eq {
($left:expr, $right:expr) => {
assert_eq!($left.collect::<Vec<_>>(), $right);
};
}

#[test]
fn skip_ansi_escape_sequence_works() {
let blue_text = "\u{1b}[34mHello\u{1b}[0m";
Expand Down Expand Up @@ -503,80 +431,4 @@ mod tests {
fn display_width_emojis() {
assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20);
}

#[test]
fn split_words_no_words() {
assert_iter_eq!(split_words(vec![], &HyphenSplitter), vec![]);
}

#[test]
fn split_words_empty_word() {
assert_iter_eq!(
split_words(vec![Word::from(" ")], &HyphenSplitter),
vec![Word::from(" ")]
);
}

#[test]
fn split_words_single_word() {
assert_iter_eq!(
split_words(vec![Word::from("foobar")], &HyphenSplitter),
vec![Word::from("foobar")]
);
}

#[test]
fn split_words_hyphen_splitter() {
assert_iter_eq!(
split_words(vec![Word::from("foo-bar")], &HyphenSplitter),
vec![Word::from("foo-"), Word::from("bar")]
);
}

#[test]
fn split_words_adds_penalty() {
#[derive(Clone, Debug)]
struct FixedSplitPoint;
impl WordSplitter for FixedSplitPoint {
fn split_points(&self, _: &str) -> Vec<usize> {
vec![3]
}
}

assert_iter_eq!(
split_words(vec![Word::from("foobar")].into_iter(), &FixedSplitPoint),
vec![
Word {
word: "foo",
width: 3,
whitespace: "",
penalty: "-"
},
Word {
word: "bar",
width: 3,
whitespace: "",
penalty: ""
}
]
);

assert_iter_eq!(
split_words(vec![Word::from("fo-bar")].into_iter(), &FixedSplitPoint),
vec![
Word {
word: "fo-",
width: 3,
whitespace: "",
penalty: ""
},
Word {
word: "bar",
width: 3,
whitespace: "",
penalty: ""
}
]
);
}
}
Loading

0 comments on commit c432fa6

Please sign in to comment.