Skip to content

Commit

Permalink
Merge pull request #332 from mgeisler/word-separator-trait
Browse files Browse the repository at this point in the history
Add `WordSeparator` trait to allow customizing how words are found in a line of text
  • Loading branch information
mgeisler authored May 2, 2021
2 parents ab57499 + 14faf1f commit 48b9480
Show file tree
Hide file tree
Showing 9 changed files with 383 additions and 211 deletions.
23 changes: 23 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,29 @@
This file lists the most important changes made in each release of
`textwrap`.

## Unreleased

This is a major feature release which adds a new generic type
parameter to the `Options` struct. This new parameter lets you specify
how words are found in the text.

Common usages of textwrap stays unchanged, but if you previously
spelled out the full type for `Options`, you now need to take th extra
type parameter into account. This means that

```rust
let options: Options<HyphenSplitter> = Options::new(80);
```

need to change to

```rust
let options: Options<AsciiSpace, HyphenSplitter> = Options::new(80);
```

You won’t see any chance if you call `wrap` directly with a width or
with an `Options` constructed on the fly.

## Version 0.13.4 (2021-02-23)

This release removes `println!` statements which was left behind in
Expand Down
8 changes: 5 additions & 3 deletions examples/interactive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ mod unix_only {
use termion::{color, cursor, style};
#[cfg(feature = "smawk")]
use textwrap::core::WrapAlgorithm::{FirstFit, OptimalFit};
use textwrap::{wrap, HyphenSplitter, NoHyphenation, Options, WordSplitter};
use textwrap::{wrap, AsciiSpace, Options};
use textwrap::{HyphenSplitter, NoHyphenation, WordSplitter};

#[cfg(feature = "hyphenation")]
use hyphenation::{Language, Load, Standard};
Expand Down Expand Up @@ -56,7 +57,7 @@ mod unix_only {

fn draw_text<'a>(
text: &str,
options: &Options<'a>,
options: &Options<'a, AsciiSpace, Box<dyn WordSplitter>>,
splitter_label: &str,
stdout: &mut RawTerminal<io::Stdout>,
) -> Result<(), io::Error> {
Expand Down Expand Up @@ -256,7 +257,8 @@ mod unix_only {
}

let mut label = labels.pop().unwrap();
let mut options: Options = Options::new(35).splitter(Box::new(HyphenSplitter));
let mut options =
Options::new(35).splitter(Box::new(HyphenSplitter) as Box<dyn WordSplitter>);
options.break_words = false;
options.splitter = splitters.pop().unwrap();

Expand Down
4 changes: 2 additions & 2 deletions examples/layout.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use textwrap::{wrap, HyphenSplitter, Options};
use textwrap::{wrap, HyphenSplitter, Options, WordSplitter};

fn main() {
let example = "Memory safety without garbage collection. \
Concurrency without data races. \
Zero-cost abstractions.";
let mut prev_lines = vec![];

let mut options: Options = Options::new(0).splitter(Box::new(HyphenSplitter));
let mut options = Options::new(0).splitter(Box::new(HyphenSplitter) as Box<dyn WordSplitter>);
#[cfg(feature = "hyphenation")]
{
use hyphenation::Load;
Expand Down
2 changes: 1 addition & 1 deletion examples/wasm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ pub fn draw_wrapped_text(

let mut lineno = 0;
for line in text.split('\n') {
let words = core::find_words(line);
let words = options.word_separator.find_words(line);
let split_words = core::split_words(words, &options);

let canvas_words = split_words
Expand Down
138 changes: 9 additions & 129 deletions src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
//! something:
//!
//! 1. Split your input into [`Fragment`]s. These are abstract blocks
//! of text or content which can be wrapped into lines. You can use
//! [`find_words`] to do this for text.
//! of text or content which can be wrapped into lines. See
//! [`WordSeparator`](crate::WordSeparator) for how to do this for
//! text.
//!
//! 2. Potentially split your fragments into smaller pieces. This
//! allows you to implement things like hyphenation. If wrapping
Expand Down Expand Up @@ -48,7 +49,7 @@ const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e';
/// `chars` provide the following characters. The `chars` will be
/// modified if `ch` is the start of an ANSI escape sequence.
#[inline]
fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool {
pub(crate) fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool {
if ch == CSI.0 && chars.next() == Some(CSI.1) {
// We have found the start of an ANSI escape code, typically
// used for colored terminal text. We skip until we find a
Expand Down Expand Up @@ -326,51 +327,6 @@ impl Fragment for Word<'_> {
}
}

/// Split line into words separated by regions of `' '` characters.
///
/// # Examples
///
/// ```
/// use textwrap::core::{find_words, Fragment, Word};
/// let words = find_words("Hello World!").collect::<Vec<_>>();
/// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]);
/// assert_eq!(words[0].width(), 5);
/// assert_eq!(words[0].whitespace_width(), 1);
/// assert_eq!(words[0].penalty_width(), 0);
/// ```
pub fn find_words(line: &str) -> impl Iterator<Item = Word> {
let mut start = 0;
let mut in_whitespace = false;
let mut char_indices = line.char_indices();

std::iter::from_fn(move || {
// for (idx, ch) in char_indices does not work, gives this
// error:
//
// > cannot move out of `char_indices`, a captured variable in
// > an `FnMut` closure
#[allow(clippy::while_let_on_iterator)]
while let Some((idx, ch)) = char_indices.next() {
if in_whitespace && ch != ' ' {
let word = Word::from(&line[start..idx]);
start = idx;
in_whitespace = ch == ' ';
return Some(word);
}

in_whitespace = ch == ' ';
}

if start < line.len() {
let word = Word::from(&line[start..]);
start = line.len();
return Some(word);
}

None
})
}

/// Split words into smaller words according to the split points given
/// by `options`.
///
Expand Down Expand Up @@ -398,9 +354,9 @@ pub fn find_words(line: &str) -> impl Iterator<Item = Word> {
/// vec![Word::from("foo-bar")]
/// );
/// ```
pub fn split_words<'a, I, S>(
pub fn split_words<'a, I, T, S>(
words: I,
options: &'a Options<'a, S>,
options: &'a Options<'a, T, S>,
) -> impl Iterator<Item = Word<'a>>
where
I: IntoIterator<Item = Word<'a>>,
Expand Down Expand Up @@ -510,7 +466,8 @@ pub enum WrapAlgorithm {
/// a large gap:
///
/// ```
/// use textwrap::core::{find_words, wrap_first_fit, Word};
/// use textwrap::core::{wrap_first_fit, Word};
/// use textwrap::{AsciiSpace, WordSeparator};
///
/// // Helper to convert wrapped lines to a Vec<String>.
/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
Expand All @@ -520,7 +477,7 @@ pub enum WrapAlgorithm {
/// }
///
/// let text = "These few words will unfortunately not wrap nicely.";
/// let words = find_words(text).collect::<Vec<_>>();
/// let words = AsciiSpace.find_words(text).collect::<Vec<_>>();
/// assert_eq!(lines_to_strings(wrap_first_fit(&words, |_| 15)),
/// vec!["These few words",
/// "will", // <-- short line
Expand Down Expand Up @@ -748,83 +705,6 @@ mod tests {
assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20);
}

#[test]
fn find_words_empty() {
assert_iter_eq!(find_words(""), vec![]);
}

#[test]
fn find_words_single_word() {
assert_iter_eq!(find_words("foo"), vec![Word::from("foo")]);
}

#[test]
fn find_words_two_words() {
assert_iter_eq!(
find_words("foo bar"),
vec![Word::from("foo "), Word::from("bar")]
);
}

#[test]
fn find_words_multiple_words() {
assert_iter_eq!(
find_words("foo bar baz"),
vec![Word::from("foo "), Word::from("bar "), Word::from("baz")]
);
}

#[test]
fn find_words_whitespace() {
assert_iter_eq!(find_words(" "), vec![Word::from(" ")]);
}

#[test]
fn find_words_inter_word_whitespace() {
assert_iter_eq!(
find_words("foo bar"),
vec![Word::from("foo "), Word::from("bar")]
)
}

#[test]
fn find_words_trailing_whitespace() {
assert_iter_eq!(find_words("foo "), vec![Word::from("foo ")]);
}

#[test]
fn find_words_leading_whitespace() {
assert_iter_eq!(
find_words(" foo"),
vec![Word::from(" "), Word::from("foo")]
);
}

#[test]
fn find_words_multi_column_char() {
assert_iter_eq!(
find_words("\u{1f920}"), // cowboy emoji 🤠
vec![Word::from("\u{1f920}")]
);
}

#[test]
fn find_words_hyphens() {
assert_iter_eq!(find_words("foo-bar"), vec![Word::from("foo-bar")]);
assert_iter_eq!(
find_words("foo- bar"),
vec![Word::from("foo- "), Word::from("bar")]
);
assert_iter_eq!(
find_words("foo - bar"),
vec![Word::from("foo "), Word::from("- "), Word::from("bar")]
);
assert_iter_eq!(
find_words("foo -bar"),
vec![Word::from("foo "), Word::from("-bar")]
);
}

#[test]
fn split_words_no_words() {
assert_iter_eq!(split_words(vec![], &Options::new(80)), vec![]);
Expand Down
Loading

0 comments on commit 48b9480

Please sign in to comment.