diff --git a/Cargo.toml b/Cargo.toml index 6a6d9bd7..14970331 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,11 +21,12 @@ harness = false path = "benches/linear.rs" [features] -default = ["unicode-width", "smawk"] +default = ["unicode-linebreak", "unicode-width", "smawk"] [dependencies] smawk = { version = "0.3", optional = true } terminal_size = { version = "0.1", optional = true } +unicode-linebreak = { version = "0.1", optional = true } unicode-width = { version= "0.1", optional = true } [dependencies.hyphenation] diff --git a/benches/linear.rs b/benches/linear.rs index d6486466..7a6c6868 100644 --- a/benches/linear.rs +++ b/benches/linear.rs @@ -26,10 +26,25 @@ pub fn benchmark(c: &mut Criterion) { #[cfg(feature = "smawk")] { + #[cfg(feature = "unicode-linebreak")] + { + let options = textwrap::Options::new(LINE_LENGTH) + .wrap_algorithm(textwrap::core::WrapAlgorithm::OptimalFit) + .word_separator(textwrap::UnicodeBreakProperties); + group.bench_with_input( + BenchmarkId::new("fill_optimal_fit_unicode", length), + &text, + |b, text| { + b.iter(|| textwrap::fill(text, &options)); + }, + ); + } + let options = textwrap::Options::new(LINE_LENGTH) - .wrap_algorithm(textwrap::core::WrapAlgorithm::OptimalFit); + .wrap_algorithm(textwrap::core::WrapAlgorithm::OptimalFit) + .word_separator(textwrap::AsciiSpace); group.bench_with_input( - BenchmarkId::new("fill_optimal_fit", length), + BenchmarkId::new("fill_optimal_fit_ascii", length), &text, |b, text| { b.iter(|| textwrap::fill(text, &options)); @@ -38,7 +53,8 @@ pub fn benchmark(c: &mut Criterion) { } let options = textwrap::Options::new(LINE_LENGTH) - .wrap_algorithm(textwrap::core::WrapAlgorithm::FirstFit); + .wrap_algorithm(textwrap::core::WrapAlgorithm::FirstFit) + .word_separator(textwrap::AsciiSpace); group.bench_with_input( BenchmarkId::new("fill_first_fit", length), &text, diff --git a/examples/interactive.rs b/examples/interactive.rs index 7c2e687c..3671fc92 100644 --- a/examples/interactive.rs +++ b/examples/interactive.rs @@ -21,7 +21,7 @@ mod unix_only { use termion::{color, cursor, style}; #[cfg(feature = "smawk")] use textwrap::core::WrapAlgorithm::{FirstFit, OptimalFit}; - use textwrap::{wrap, AsciiSpace, Options}; + use textwrap::{wrap, AsciiSpace, Options, WordSeparator}; use textwrap::{HyphenSplitter, NoHyphenation, WordSplitter}; #[cfg(feature = "hyphenation")] @@ -57,7 +57,7 @@ mod unix_only { fn draw_text<'a>( text: &str, - options: &Options<'a, AsciiSpace, Box>, + options: &Options<'a, Box, Box>, splitter_label: &str, stdout: &mut RawTerminal, ) -> Result<(), io::Error> { @@ -257,8 +257,9 @@ mod unix_only { } let mut label = labels.pop().unwrap(); - let mut options = - Options::new(35).splitter(Box::new(HyphenSplitter) as Box); + let mut options = Options::new(35) + .splitter(Box::new(HyphenSplitter) as Box) + .word_separator(Box::new(AsciiSpace) as Box); options.break_words = false; options.splitter = splitters.pop().unwrap(); diff --git a/examples/wasm/Cargo.lock b/examples/wasm/Cargo.lock index ff400e00..852a1c80 100644 --- a/examples/wasm/Cargo.lock +++ b/examples/wasm/Cargo.lock @@ -1,5 +1,16 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + [[package]] name = "bumpalo" version = "3.6.1" @@ -52,6 +63,12 @@ dependencies = [ "cfg-if 1.0.0", ] +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + [[package]] name = "proc-macro2" version = "1.0.26" @@ -70,6 +87,23 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "regex" +version = "1.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "957056ecddbeba1b26965114e191d2e8589ce74db242b6ea25fc4062427a5c19" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548" + [[package]] name = "scoped-tls" version = "1.0.0" @@ -98,6 +132,7 @@ name = "textwrap" version = "0.13.4" dependencies = [ "smawk", + "unicode-linebreak", "unicode-width", ] @@ -113,6 +148,15 @@ dependencies = [ "web-sys", ] +[[package]] +name = "unicode-linebreak" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05a31f45d18a3213b918019f78fe6a73a14ab896807f0aaf5622aa0684749455" +dependencies = [ + "regex", +] + [[package]] name = "unicode-width" version = "0.1.8" diff --git a/examples/wasm/src/lib.rs b/examples/wasm/src/lib.rs index 308a5543..5166bb1f 100644 --- a/examples/wasm/src/lib.rs +++ b/examples/wasm/src/lib.rs @@ -1,7 +1,7 @@ use wasm_bindgen::prelude::*; use wasm_bindgen::JsCast; -use textwrap::core; +use textwrap::{core, WordSeparator}; #[wasm_bindgen] extern "C" { diff --git a/src/core.rs b/src/core.rs index f227acac..109780d7 100644 --- a/src/core.rs +++ b/src/core.rs @@ -237,7 +237,7 @@ impl std::ops::Deref for Word<'_> { } impl<'a> Word<'a> { - /// Construct a new `Word`. + /// Construct a `Word` from a string. /// /// A trailing stretch of `' '` is automatically taken to be the /// whitespace part of the word. @@ -354,9 +354,9 @@ impl Fragment for Word<'_> { /// vec![Word::from("foo-bar")] /// ); /// ``` -pub fn split_words<'a, I, T, S>( +pub fn split_words<'a, I, R, S>( words: I, - options: &'a Options<'a, T, S>, + options: &'a Options<'a, R, S>, ) -> impl Iterator> where I: IntoIterator>, diff --git a/src/lib.rs b/src/lib.rs index 51d0758f..5031fdd6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -130,6 +130,17 @@ //! //! These features are enabled by default: //! +//! * `unicode-linebreak`: enables finding words using the +//! [unicode-linebreak] crate, which implements the line breaking +//! algorithm described in [Unicode Standard Annex +//! #14](https://www.unicode.org/reports/tr14/). +//! +//! This feature can be disabled if you are happy to find words +//! separated by ASCII space characters only. People wrapping text +//! with emojis or East-Asian characters will want most likely want +//! to enable this feature. See the [`WordSeparator`] trait for +//! details. +//! //! * `unicode-width`: enables correct width computation of non-ASCII //! characters via the [unicode-width] crate. Without this feature, //! every [`char`] is 1 column wide, except for emojis which are 2 @@ -159,6 +170,7 @@ //! * `hyphenation`: enables language-sensitive hyphenation via the //! [hyphenation] crate. See the [`WordSplitter`] trait for details. //! +//! [unicode-linebreak]: https://docs.rs/unicode-linebreak/ //! [unicode-width]: https://docs.rs/unicode-width/ //! [smawk]: https://docs.rs/smawk/ //! [textwrap-macros]: https://docs.rs/textwrap-macros/ @@ -181,10 +193,28 @@ mod splitting; pub use crate::splitting::{HyphenSplitter, NoHyphenation, WordSplitter}; mod word_separator; +#[cfg(feature = "unicode-linebreak")] +pub use word_separator::UnicodeBreakProperties; pub use word_separator::{AsciiSpace, WordSeparator}; pub mod core; +// This private macro lets us hide the actual WordSeperator used in +// function signatures below. +#[cfg(feature = "unicode-linebreak")] +macro_rules! DefaultWordSeparator { + () => { + UnicodeBreakProperties + }; +} + +#[cfg(not(feature = "unicode-linebreak"))] +macro_rules! DefaultWordSeparator { + () => { + AsciiSpace + }; +} + /// Holds settings for wrapping and filling text. #[derive(Debug, Clone)] pub struct Options<'a, R = Box, S = Box> { @@ -227,19 +257,21 @@ impl<'a, R: Clone, S: Clone> From<&'a Options<'a, R, S>> for Options<'a, R, S> { } } -impl<'a> From for Options<'a, AsciiSpace, HyphenSplitter> { +impl<'a> From for Options<'a, DefaultWordSeparator!(), HyphenSplitter> { fn from(width: usize) -> Self { Options::new(width) } } /// Constructors for boxed Options, specifically. -impl<'a> Options<'a, AsciiSpace, HyphenSplitter> { +impl<'a> Options<'a, DefaultWordSeparator!(), HyphenSplitter> { /// Creates a new [`Options`] with the specified width and static /// dispatch using the [`HyphenSplitter`]. Equivalent to /// /// ``` /// # use textwrap::{AsciiSpace, Options, HyphenSplitter, WordSplitter}; + /// # #[cfg(feature = "unicode-linebreak")] + /// # use textwrap::UnicodeBreakProperties; /// # let width = 80; /// # let actual = Options::new(width); /// # let expected = @@ -248,6 +280,9 @@ impl<'a> Options<'a, AsciiSpace, HyphenSplitter> { /// initial_indent: "", /// subsequent_indent: "", /// break_words: true, + /// #[cfg(feature = "unicode-linebreak")] + /// word_separator: UnicodeBreakProperties, + /// #[cfg(not(feature = "unicode-linebreak"))] /// word_separator: AsciiSpace, /// #[cfg(feature = "smawk")] /// wrap_algorithm: textwrap::core::WrapAlgorithm::OptimalFit, @@ -263,9 +298,9 @@ impl<'a> Options<'a, AsciiSpace, HyphenSplitter> { /// # assert_eq!(actual.wrap_algorithm, expected.wrap_algorithm); /// ``` /// - /// Note that the default wrap algorithm changes based on the - /// `smawk` Cargo feature. The best available algorithm is used by - /// default. + /// Note that the default word separator and wrap algorithms + /// changes based on the available Cargo features. The best + /// available algorithm is used by default. /// /// Static dispatch means here, that the splitter is stored as-is /// and the type is known at compile-time. Thus the returned value @@ -293,22 +328,18 @@ impl<'a> Options<'a, AsciiSpace, HyphenSplitter> { /// // uses HyphenSplitter with static dispatch /// // the actual type: Options /// let opt = Options::new(width); - /// # let opt_coerce: Options = opt; /// /// // uses NoHyphenation with static dispatch /// // the actual type: Options /// let opt = Options::new(width).splitter(NoHyphenation); - /// # let opt_coerce: Options = opt; /// /// // uses HyphenSplitter with dynamic dispatch /// // the actual type: Options> /// let opt: Options<_> = Options::new(width).splitter(Box::new(HyphenSplitter)); - /// # let opt_coerce: Options> = opt; /// /// // uses NoHyphenation with dynamic dispatch /// // the actual type: Options> /// let opt: Options<_> = Options::new(width).splitter(Box::new(NoHyphenation)); - /// # let opt_coerce: Options> = opt; /// ``` /// /// Notice that the last two variables have the same type, despite @@ -342,12 +373,14 @@ impl<'a> Options<'a, AsciiSpace, HyphenSplitter> { } } -impl<'a, S> Options<'a, AsciiSpace, S> { +impl<'a, S> Options<'a, DefaultWordSeparator!(), S> { /// Creates a new [`Options`] with the specified width and /// splitter. Equivalent to /// /// ``` /// # use textwrap::{AsciiSpace, Options, NoHyphenation, HyphenSplitter}; + /// # #[cfg(feature = "unicode-linebreak")] + /// # use textwrap::UnicodeBreakProperties; /// # const splitter: NoHyphenation = NoHyphenation; /// # const width: usize = 80; /// # let actual = Options::with_splitter(width, splitter); @@ -357,6 +390,9 @@ impl<'a, S> Options<'a, AsciiSpace, S> { /// initial_indent: "", /// subsequent_indent: "", /// break_words: true, + /// #[cfg(feature = "unicode-linebreak")] + /// word_separator: UnicodeBreakProperties, + /// #[cfg(not(feature = "unicode-linebreak"))] /// word_separator: textwrap::AsciiSpace, /// #[cfg(feature = "smawk")] /// wrap_algorithm: textwrap::core::WrapAlgorithm::OptimalFit, @@ -407,7 +443,9 @@ impl<'a, S> Options<'a, AsciiSpace, S> { /// use textwrap::{HyphenSplitter, Options, AsciiSpace}; /// # const width: usize = 80; /// + /// # #[cfg(not(feature = "unicode-linebreak"))] /// const FOO: Options = Options::with_splitter(width, HyphenSplitter); + /// # #[cfg(not(feature = "unicode-linebreak"))] /// static BAR: Options = FOO; /// ``` pub const fn with_splitter(width: usize, splitter: S) -> Self { @@ -416,7 +454,7 @@ impl<'a, S> Options<'a, AsciiSpace, S> { initial_indent: "", subsequent_indent: "", break_words: true, - word_separator: AsciiSpace, + word_separator: DefaultWordSeparator!(), #[cfg(feature = "smawk")] wrap_algorithm: core::WrapAlgorithm::OptimalFit, #[cfg(not(feature = "smawk"))] @@ -553,11 +591,11 @@ impl<'a, R, S> Options<'a, R, S> { /// example: /// /// ``` - /// use textwrap::{AsciiSpace, HyphenSplitter, NoHyphenation, Options}; + /// use textwrap::{HyphenSplitter, NoHyphenation, Options}; /// // The default type returned by `new`: - /// let opt: Options = Options::new(80); + /// let opt: Options<_, HyphenSplitter> = Options::new(80); /// // Setting a different splitter changes the type - /// let opt: Options = opt.splitter(NoHyphenation); + /// let opt: Options<_, NoHyphenation> = opt.splitter(NoHyphenation); /// ``` /// /// [`self.splitter`]: #structfield.splitter @@ -700,7 +738,7 @@ where /// assert_eq!(options.initial_indent, "* "); /// assert_eq!(options.subsequent_indent, " "); /// ``` -pub fn unfill(text: &str) -> (String, Options<'_, AsciiSpace, HyphenSplitter>) { +pub fn unfill(text: &str) -> (String, Options<'_, DefaultWordSeparator!(), HyphenSplitter>) { let trimmed = text.trim_end_matches('\n'); let prefix_chars: &[_] = &[' ', '-', '+', '*', '>', '#', '/']; @@ -1193,9 +1231,11 @@ where /// /// Since we can only replace existing whitespace in the input with /// `'\n'`, we cannot do hyphenation nor can we split words longer -/// than the line width. Indentation is also ruled out. In other -/// words, `fill_inplace(width)` behaves as if you had called [`fill`] -/// with these options: +/// than the line width. We also need to use `AsciiSpace` as the word +/// separator since we need `' '` characters between words in order to +/// replace some of them with a `'\n'`. Indentation is also ruled out. +/// In other words, `fill_inplace(width)` behaves as if you had called +/// [`fill`] with these options: /// /// ``` /// # use textwrap::{core, AsciiSpace, Options, NoHyphenation}; @@ -1367,7 +1407,8 @@ mod tests { fn issue_129() { // The dash is an em-dash which takes up four bytes. We used // to panic since we tried to index into the character. - assert_eq!(wrap("x – x", 1), vec!["x", "–", "x"]); + let options = Options::new(1).word_separator(AsciiSpace); + assert_eq!(wrap("x – x", options), vec!["x", "–", "x"]); } #[test] @@ -1375,9 +1416,23 @@ mod tests { fn wide_character_handling() { assert_eq!(wrap("Hello, World!", 15), vec!["Hello, World!"]); assert_eq!( - wrap("Hello, World!", 15), + wrap( + "Hello, World!", + Options::new(15).word_separator(AsciiSpace) + ), vec!["Hello,", "World!"] ); + + // Wide characters are allowed to break if the + // unicode-linebreak feature is enabled. + #[cfg(feature = "unicode-linebreak")] + assert_eq!( + wrap( + "Hello, World!", + Options::new(15).word_separator(UnicodeBreakProperties) + ), + vec!["Hello, W", "orld!"] + ); } #[test] @@ -1639,7 +1694,8 @@ mod tests { fn break_words_wide_characters() { // Even the poor man's version of `ch_width` counts these // characters as wide. - assert_eq!(wrap("Hello", 5), vec!["He", "ll", "o"]); + let options = Options::new(5).word_separator(AsciiSpace); + assert_eq!(wrap("Hello", options), vec!["He", "ll", "o"]); } #[test] @@ -1726,6 +1782,7 @@ mod tests { } #[test] + #[cfg(not(feature = "unicode-linebreak"))] fn cloning_works() { static OPT: Options = Options::with_splitter(80, HyphenSplitter); @@ -1883,7 +1940,7 @@ mod tests { vector.push(opt_full_type); results.push(vec!["over-", "caffinated"]); - // Actually: Options, Box> + // Actually: Options, Box> let opt_abbreviated_type = Options::new(10) .break_words(false) .splitter(Box::new(NoHyphenation) as Box) diff --git a/src/word_separator.rs b/src/word_separator.rs index 3a94a833..469c9da4 100644 --- a/src/word_separator.rs +++ b/src/word_separator.rs @@ -1,12 +1,15 @@ //! Line breaking functionality. +#[cfg(feature = "unicode-linebreak")] +use crate::core::skip_ansi_escape_sequence; use crate::core::Word; -/// Describes where a line break can occur. +/// Describes where words occur in a line of text. /// -/// The simplest approach is say that a line can end after one or more -/// ASCII spaces (`' '`). This works for Western languages without -/// emojis. +/// The simplest approach is say that words are separated by one or +/// more ASCII spaces (`' '`). This works for Western languages +/// without emojis. A more complex approach is to use the Unicode line +/// breaking algorithm, which finds break points in non-ASCII text. /// /// The line breaks occur between words, please see the /// [`WordSplitter`](crate::WordSplitter) trait for options of how @@ -55,7 +58,7 @@ impl WordSeparator for Box { } } -/// Find line breaks by regions of `' '` characters. +/// Find words by splitting on regions of `' '` characters. #[derive(Clone, Copy, Debug, Default)] pub struct AsciiSpace; @@ -106,6 +109,162 @@ impl WordSeparator for AsciiSpace { } } +/// Find words using the Unicode line breaking algorithm. +#[cfg(feature = "unicode-linebreak")] +#[derive(Clone, Copy, Debug, Default)] +pub struct UnicodeBreakProperties; + +/// Split `line` into words using Unicode break properties. +/// +/// This word separator uses the Unicode line breaking algorithm +/// described in [Unicode Standard Annex +/// #14](https://www.unicode.org/reports/tr14/) to find legal places +/// to break lines. There is a small difference in that the U+002D +/// (Hyphen-Minus) and U+00AD (Soft Hyphen) don’t create a line break: +/// to allow a line break at a hyphen, use the +/// [`HyphenSplitter`](super::HyphenSplitter). Soft hyphens are not +/// currently supported. +/// +/// # Examples +/// +/// Unlike [`AsciiSpace`], the Unicode line breaking algorithm will +/// find line break opportunities between some characters with no +/// intervening whitespace: +/// +/// ``` +/// #[cfg(feature = "unicode-linebreak")] { +/// use textwrap::{WordSeparator, UnicodeBreakProperties}; +/// use textwrap::core::Word; +/// +/// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂😍").collect::>(), +/// vec![Word::from("Emojis: "), +/// Word::from("😂"), +/// Word::from("😍")]); +/// +/// assert_eq!(UnicodeBreakProperties.find_words("CJK: 你好").collect::>(), +/// vec![Word::from("CJK: "), +/// Word::from("你"), +/// Word::from("好")]); +/// } +/// ``` +/// +/// A U+2060 (Word Joiner) character can be inserted if you want to +/// manually override the defaults and keep the characters together: +/// +/// ``` +/// #[cfg(feature = "unicode-linebreak")] { +/// use textwrap::{UnicodeBreakProperties, WordSeparator}; +/// use textwrap::core::Word; +/// +/// assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂\u{2060}😍").collect::>(), +/// vec![Word::from("Emojis: "), +/// Word::from("😂\u{2060}😍")]); +/// } +/// ``` +/// +/// The Unicode line breaking algorithm will also automatically +/// suppress break breaks around certain punctuation characters:: +/// +/// ``` +/// #[cfg(feature = "unicode-linebreak")] { +/// use textwrap::{UnicodeBreakProperties, WordSeparator}; +/// use textwrap::core::Word; +/// +/// assert_eq!(UnicodeBreakProperties.find_words("[ foo ] bar !").collect::>(), +/// vec![Word::from("[ foo ] "), +/// Word::from("bar !")]); +/// } +/// ``` +#[cfg(feature = "unicode-linebreak")] +impl WordSeparator for UnicodeBreakProperties { + fn find_words<'a>(&self, line: &'a str) -> Box> + 'a> { + // Construct an iterator over (original index, stripped index) + // tuples. We find the Unicode linebreaks on a stripped string, + // but we need the original indices so we can form words based on + // the original string. + let mut last_stripped_idx = 0; + let mut char_indices = line.char_indices(); + let mut idx_map = std::iter::from_fn(move || match char_indices.next() { + Some((orig_idx, ch)) => { + let stripped_idx = last_stripped_idx; + if !skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) { + last_stripped_idx += ch.len_utf8(); + } + Some((orig_idx, stripped_idx)) + } + None => None, + }); + + let stripped = strip_ansi_escape_sequences(&line); + let mut opportunities = unicode_linebreak::linebreaks(&stripped) + .filter(|(idx, _)| { + #[allow(clippy::match_like_matches_macro)] + match &line[..*idx].chars().next_back() { + // We suppress breaks at ‘-’ since we want to control + // this via the WordSplitter. + Some('-') => false, + // Soft hyphens are currently not supported since we + // require all `Word` fragments to be continuous in + // the input string. + Some(SHY) => false, + // Other breaks should be fine! + _ => true, + } + }) + .collect::>() + .into_iter(); + + // Remove final break opportunity, we will add it below using + // &line[start..]; This ensures that we correctly include a + // trailing ANSI escape sequence. + opportunities.next_back(); + + let mut start = 0; + Box::new(std::iter::from_fn(move || { + #[allow(clippy::while_let_on_iterator)] + while let Some((idx, _)) = opportunities.next() { + if let Some((orig_idx, _)) = idx_map.find(|&(_, stripped_idx)| stripped_idx == idx) + { + let word = Word::from(&line[start..orig_idx]); + start = orig_idx; + return Some(word); + } + } + + if start < line.len() { + let word = Word::from(&line[start..]); + start = line.len(); + return Some(word); + } + + None + })) + } +} + +/// Soft hyphen, also knows as a “shy hyphen”. Should show up as ‘-’ +/// if a line is broken at this point, and otherwise be invisible. +/// Textwrap does not currently support breaking words at soft +/// hyphens. +#[cfg(feature = "unicode-linebreak")] +const SHY: char = '\u{00ad}'; + +// Strip all ANSI escape sequences from `text`. +#[cfg(feature = "unicode-linebreak")] +fn strip_ansi_escape_sequences(text: &str) -> String { + let mut result = String::with_capacity(text.len()); + + let mut chars = text.chars(); + while let Some(ch) = chars.next() { + if skip_ansi_escape_sequence(ch, &mut chars) { + continue; + } + result.push(ch); + } + + result +} + #[cfg(test)] mod tests { use super::*; @@ -208,11 +367,23 @@ mod tests { AsciiSpace.find_words(&format!("{}{}", green_hello, blue_world)), vec![Word::from(&green_hello), Word::from(&blue_world)] ); + + #[cfg(feature = "unicode-linebreak")] + assert_iter_eq!( + UnicodeBreakProperties.find_words(&format!("{}{}", green_hello, blue_world)), + vec![Word::from(&green_hello), Word::from(&blue_world)] + ); } #[test] fn ascii_space_color_inside_word() { let text = "foo\u{1b}[0m\u{1b}[32mbar\u{1b}[0mbaz"; assert_iter_eq!(AsciiSpace.find_words(&text), vec![Word::from(text)]); + + #[cfg(feature = "unicode-linebreak")] + assert_iter_eq!( + UnicodeBreakProperties.find_words(&text), + vec![Word::from(text)] + ); } } diff --git a/tests/traits.rs b/tests/traits.rs index f0501c1e..c4bda24e 100644 --- a/tests/traits.rs +++ b/tests/traits.rs @@ -1,4 +1,4 @@ -use textwrap::{AsciiSpace, HyphenSplitter, NoHyphenation, Options, WordSeparator, WordSplitter}; +use textwrap::{AsciiSpace, NoHyphenation, Options, WordSeparator, WordSplitter}; /// Cleaned up type name. fn type_name(_val: &T) -> String { @@ -9,6 +9,7 @@ fn type_name(_val: &T) -> String { } #[test] +#[cfg(not(feature = "unicode-linebreak"))] fn static_hyphensplitter() { // Inferring the full type. let options = Options::new(10); @@ -18,7 +19,7 @@ fn static_hyphensplitter() { ); // Inferring part of the type. - let options: Options<_, HyphenSplitter> = Options::new(10); + let options: Options<_, textwrap::HyphenSplitter> = Options::new(10); assert_eq!( type_name(&options), "textwrap::Options"