diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index da84414b..16d9b33a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -134,12 +134,18 @@ jobs: - name: Build fuzz targets run: cargo fuzz build - - name: Fuzz test + - name: Fuzz test fill_first_fit run: cargo fuzz run fill_first_fit -- -max_total_time=30 - - name: Minimize fuzz corpus + - name: Fuzz test fill_optimal_fit + run: cargo fuzz run fill_optimal_fit -- -max_total_time=30 + + - name: Minimize fill_first_fit corpus run: cargo fuzz cmin fill_first_fit + - name: Minimize fill_optimal_fit corpus + run: cargo fuzz cmin fill_optimal_fit + binary-sizes: name: Compute binary sizes runs-on: ubuntu-latest diff --git a/benches/linear.rs b/benches/linear.rs index 2d8ca28b..4d0bcde0 100644 --- a/benches/linear.rs +++ b/benches/linear.rs @@ -9,7 +9,7 @@ use criterion::{criterion_group, criterion_main}; use lipsum::lipsum_words_from_seed; -const LINE_LENGTH: usize = 60; +const LINE_LENGTH: u32 = 60; /// Generate a lorem ipsum text with the given number of characters. fn lorem_ipsum(length: usize) -> String { diff --git a/examples/layout.rs b/examples/layout.rs index 518ae1bc..177d125a 100644 --- a/examples/layout.rs +++ b/examples/layout.rs @@ -7,7 +7,8 @@ fn main() { Zero-cost abstractions."; let mut prev_lines = vec![]; - let mut options = Options::new(0).word_splitter(Box::new(HyphenSplitter) as Box); + let mut options = + Options::new(0).word_splitter(Box::new(HyphenSplitter) as Box); #[cfg(feature = "hyphenation")] { use hyphenation::Load; @@ -21,9 +22,9 @@ fn main() { let lines = wrap(example, &options); if lines != prev_lines { let title = format!(" Width: {} ", width); - println!(".{:-^1$}.", title, width + 2); + println!(".{:-^1$}.", title, width as usize + 2); for line in &lines { - println!("| {:1$} |", line, width); + println!("| {:1$} |", line, width as usize); } prev_lines = lines; } diff --git a/examples/wasm/src/lib.rs b/examples/wasm/src/lib.rs index 50fb405d..fe5cf5b1 100644 --- a/examples/wasm/src/lib.rs +++ b/examples/wasm/src/lib.rs @@ -145,22 +145,22 @@ impl<'a> CanvasWord<'a> { } } -const PRECISION: usize = 10; +const PRECISION: u32 = 10; impl textwrap::core::Fragment for CanvasWord<'_> { #[inline] - fn width(&self) -> usize { - (self.width * PRECISION as f64) as usize + fn width(&self) -> u32 { + (self.width * PRECISION as f64) as u32 } #[inline] - fn whitespace_width(&self) -> usize { - (self.whitespace_width * PRECISION as f64) as usize + fn whitespace_width(&self) -> u32 { + (self.whitespace_width * PRECISION as f64) as u32 } #[inline] - fn penalty_width(&self) -> usize { - (self.penalty_width * PRECISION as f64) as usize + fn penalty_width(&self) -> u32 { + (self.penalty_width * PRECISION as f64) as u32 } } @@ -250,22 +250,22 @@ pub enum WasmWrapAlgorithm { #[wasm_bindgen] #[derive(Copy, Clone, Debug, Default)] pub struct WasmOptimalFit { - pub nline_penalty: i32, - pub overflow_penalty: i32, - pub short_last_line_fraction: usize, - pub short_last_line_penalty: i32, - pub hyphen_penalty: i32, + pub nline_penalty: u32, + pub overflow_penalty: u32, + pub short_last_line_fraction: u32, + pub short_last_line_penalty: u32, + pub hyphen_penalty: u32, } #[wasm_bindgen] impl WasmOptimalFit { #[wasm_bindgen(constructor)] pub fn new( - nline_penalty: i32, - overflow_penalty: i32, - short_last_line_fraction: usize, - short_last_line_penalty: i32, - hyphen_penalty: i32, + nline_penalty: u32, + overflow_penalty: u32, + short_last_line_fraction: u32, + short_last_line_penalty: u32, + hyphen_penalty: u32, ) -> WasmOptimalFit { WasmOptimalFit { nline_penalty, @@ -292,7 +292,7 @@ impl Into for WasmOptimalFit { #[wasm_bindgen] #[derive(Copy, Clone, Debug)] pub struct WasmOptions { - pub width: usize, + pub width: u32, pub break_words: bool, pub word_separator: WasmWordSeparator, pub word_splitter: WasmWordSplitter, @@ -304,7 +304,7 @@ pub struct WasmOptions { impl WasmOptions { #[wasm_bindgen(constructor)] pub fn new( - width: usize, + width: u32, break_words: bool, word_separator: WasmWordSeparator, word_splitter: WasmWordSplitter, diff --git a/examples/wasm/www/index.html b/examples/wasm/www/index.html index 50fe3bf8..d9296bf0 100644 --- a/examples/wasm/www/index.html +++ b/examples/wasm/www/index.html @@ -97,7 +97,7 @@

Textwrap WebAssembly Demo

- +
@@ -109,13 +109,13 @@

Textwrap WebAssembly Demo

- +
- +
diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 193b1f4d..0d4a5655 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -10,13 +10,26 @@ edition = "2018" cargo-fuzz = true [dependencies] -libfuzzer-sys = "0.3" +arbitrary = { version = "1", features = ["derive"] } +libfuzzer-sys = "0.4" textwrap = { path = ".." } # Prevent this from interfering with workspaces [workspace] members = ["."] +[[bin]] +name = "fill_first_fit" +path = "fuzz_targets/fill_first_fit.rs" +test = false +doc = false + +[[bin]] +name = "wrap_first_fit" +path = "fuzz_targets/wrap_first_fit.rs" +test = false +doc = false + [[bin]] name = "fill_optimal_fit" path = "fuzz_targets/fill_optimal_fit.rs" @@ -24,7 +37,7 @@ test = false doc = false [[bin]] -name = "fill_first_fit" -path = "fuzz_targets/fill_first_fit.rs" +name = "wrap_optimal_fit" +path = "fuzz_targets/wrap_optimal_fit.rs" test = false doc = false diff --git a/fuzz/fuzz_targets/fill_first_fit.rs b/fuzz/fuzz_targets/fill_first_fit.rs index ca319b0a..7a951861 100644 --- a/fuzz/fuzz_targets/fill_first_fit.rs +++ b/fuzz/fuzz_targets/fill_first_fit.rs @@ -3,7 +3,7 @@ use libfuzzer_sys::fuzz_target; use textwrap::wrap_algorithms; use textwrap::Options; -fuzz_target!(|input: (String, usize)| { +fuzz_target!(|input: (String, u32)| { let options = Options::new(input.1).wrap_algorithm(wrap_algorithms::FirstFit); let _ = textwrap::fill(&input.0, &options); }); diff --git a/fuzz/fuzz_targets/fill_optimal_fit.rs b/fuzz/fuzz_targets/fill_optimal_fit.rs index 32784c0c..09249446 100644 --- a/fuzz/fuzz_targets/fill_optimal_fit.rs +++ b/fuzz/fuzz_targets/fill_optimal_fit.rs @@ -3,7 +3,7 @@ use libfuzzer_sys::fuzz_target; use textwrap::wrap_algorithms; use textwrap::Options; -fuzz_target!(|input: (String, usize)| { +fuzz_target!(|input: (String, u32)| { let options = Options::new(input.1).wrap_algorithm(wrap_algorithms::OptimalFit::default()); let _ = textwrap::fill(&input.0, &options); }); diff --git a/fuzz/fuzz_targets/wrap_first_fit.rs b/fuzz/fuzz_targets/wrap_first_fit.rs new file mode 100644 index 00000000..d43a6a96 --- /dev/null +++ b/fuzz/fuzz_targets/wrap_first_fit.rs @@ -0,0 +1,25 @@ +#![no_main] +use arbitrary::Arbitrary; +use libfuzzer_sys::fuzz_target; +use textwrap::core; +use textwrap::wrap_algorithms::wrap_first_fit; + +#[derive(Arbitrary, Debug, Eq, PartialEq)] +struct Word { + width: u32, + whitespace_width: u32, + penalty_width: u32, +} + +#[rustfmt::skip] +impl core::Fragment for Word { + fn width(&self) -> u32 { self.width } + fn whitespace_width(&self) -> u32 { self.whitespace_width } + fn penalty_width(&self) -> u32 { self.penalty_width } +} + +fuzz_target!(|input: (u32, Vec)| { + let width = input.0; + let words = input.1; + let _ = wrap_first_fit(&words, &[width]); +}); diff --git a/fuzz/fuzz_targets/wrap_optimal_fit.rs b/fuzz/fuzz_targets/wrap_optimal_fit.rs new file mode 100644 index 00000000..d15535a4 --- /dev/null +++ b/fuzz/fuzz_targets/wrap_optimal_fit.rs @@ -0,0 +1,47 @@ +#![no_main] +use arbitrary::Arbitrary; +use libfuzzer_sys::fuzz_target; +use textwrap::core; +use textwrap::wrap_algorithms::{wrap_optimal_fit, OptimalFit}; + +#[derive(Arbitrary, Debug)] +struct Penalties { + nline_penalty: u32, + overflow_penalty: u32, + short_last_line_fraction: u32, + short_last_line_penalty: u32, + hyphen_penalty: u32, +} + +impl Into for Penalties { + fn into(self) -> OptimalFit { + OptimalFit { + nline_penalty: self.nline_penalty, + overflow_penalty: self.overflow_penalty, + short_last_line_fraction: std::cmp::max(1, self.short_last_line_fraction), + short_last_line_penalty: self.short_last_line_penalty, + hyphen_penalty: self.hyphen_penalty, + } + } +} + +#[derive(Arbitrary, Debug, Eq, PartialEq)] +struct Word { + width: u32, + whitespace_width: u32, + penalty_width: u32, +} + +#[rustfmt::skip] +impl core::Fragment for Word { + fn width(&self) -> u32 { self.width } + fn whitespace_width(&self) -> u32 { self.whitespace_width } + fn penalty_width(&self) -> u32 { self.penalty_width } +} + +fuzz_target!(|input: (u32, Vec, Penalties)| { + let width = input.0; + let words = input.1; + let penalties = input.2.into(); + let _ = wrap_optimal_fit(&words, &[width], &penalties); +}); diff --git a/src/core.rs b/src/core.rs index 1ea18f81..79123ff9 100644 --- a/src/core.rs +++ b/src/core.rs @@ -35,6 +35,8 @@ //! the functionality here is not sufficient or if you have ideas for //! improving it. We would love to hear from you! +use std::convert::TryInto; + /// The CSI or “Control Sequence Introducer” introduces an ANSI escape /// sequence. This is typically used for colored text and will be /// ignored when computing the text width. @@ -62,8 +64,10 @@ pub(crate) fn skip_ansi_escape_sequence>(ch: char, char #[cfg(feature = "unicode-width")] #[inline] -fn ch_width(ch: char) -> usize { - unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0) +fn ch_width(ch: char) -> u32 { + unicode_width::UnicodeWidthChar::width(ch) + .and_then(|w| w.try_into().ok()) + .unwrap_or(0) } /// First character which [`ch_width`] will classify as double-width. @@ -73,7 +77,7 @@ const DOUBLE_WIDTH_CUTOFF: char = '\u{1100}'; #[cfg(not(feature = "unicode-width"))] #[inline] -fn ch_width(ch: char) -> usize { +fn ch_width(ch: char) -> u32 { if ch < DOUBLE_WIDTH_CUTOFF { 1 } else { @@ -173,7 +177,7 @@ fn ch_width(ch: char) -> usize { /// [Unicode equivalence]: https://en.wikipedia.org/wiki/Unicode_equivalence /// [CJK characters]: https://en.wikipedia.org/wiki/CJK_characters /// [emoji modifier sequences]: https://unicode.org/emoji/charts/full-emoji-modifiers.html -pub fn display_width(text: &str) -> usize { +pub fn display_width(text: &str) -> u32 { let mut chars = text.chars(); let mut width = 0; while let Some(ch) = chars.next() { @@ -197,15 +201,15 @@ pub fn display_width(text: &str) -> usize { /// the displayed width of each part, which this trait provides. pub trait Fragment: std::fmt::Debug { /// Displayed width of word represented by this fragment. - fn width(&self) -> usize; + fn width(&self) -> u32; /// Displayed width of the whitespace that must follow the word /// when the word is not at the end of a line. - fn whitespace_width(&self) -> usize; + fn whitespace_width(&self) -> u32; /// Displayed width of the penalty that must be inserted if the /// word falls at the end of a line. - fn penalty_width(&self) -> usize; + fn penalty_width(&self) -> u32; } /// A piece of wrappable text, including any trailing whitespace. @@ -221,7 +225,7 @@ pub struct Word<'a> { /// Penalty string to insert if the word falls at the end of a line. pub penalty: &'a str, // Cached width in columns. - pub(crate) width: usize, + pub(crate) width: u32, } impl std::ops::Deref for Word<'_> { @@ -260,7 +264,7 @@ impl<'a> Word<'a> { /// vec![Word::from("Hel"), Word::from("lo! ")] /// ); /// ``` - pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator> + 'b { + pub fn break_apart<'b>(&'b self, line_width: u32) -> impl Iterator> + 'b { let mut char_indices = self.word.char_indices(); let mut offset = 0; let mut width = 0; @@ -304,22 +308,22 @@ impl<'a> Word<'a> { impl Fragment for Word<'_> { #[inline] - fn width(&self) -> usize { + fn width(&self) -> u32 { self.width } // We assume the whitespace consist of ' ' only. This allows us to // compute the display width in constant time. #[inline] - fn whitespace_width(&self) -> usize { - self.whitespace.len() + fn whitespace_width(&self) -> u32 { + self.whitespace.len().try_into().expect("Width exceeds u32") } // We assume the penalty is `""` or `"-"`. This allows us to // compute the display width in constant time. #[inline] - fn penalty_width(&self) -> usize { - self.penalty.len() + fn penalty_width(&self) -> u32 { + self.penalty.len().try_into().expect("Width exceeds u32") } } @@ -328,7 +332,7 @@ impl Fragment for Word<'_> { /// This simply calls [`Word::break_apart`] on words that are too /// wide. This means that no extra `'-'` is inserted, the word is /// simply broken into smaller pieces. -pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec> +pub fn break_words<'a, I>(words: I, line_width: u32) -> Vec> where I: IntoIterator>, { diff --git a/src/lib.rs b/src/lib.rs index 090313f4..3e5ff665 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -262,7 +262,7 @@ pub struct Options< WordSplit = Box, > { /// The width in columns at which the text will be wrapped. - pub width: usize, + pub width: u32, /// Indentation used for the first line of output. See the /// [`Options::initial_indent`] method. pub initial_indent: &'a str, @@ -307,7 +307,7 @@ where } } -impl<'a> From +impl<'a> From for Options< 'a, DefaultWrapAlgorithm!(), @@ -315,7 +315,7 @@ impl<'a> From word_splitters::HyphenSplitter, > { - fn from(width: usize) -> Self { + fn from(width: u32) -> Self { Options::new(width) } } @@ -356,7 +356,7 @@ impl<'a> /// Note that the default word separator and wrap algorithms /// changes based on the available Cargo features. The best /// available algorithms are used by default. - pub const fn new(width: usize) -> Self { + pub const fn new(width: u32) -> Self { Options::with_word_splitter(width, word_splitters::HyphenSplitter) } @@ -391,7 +391,7 @@ impl<'a, WordSplit> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!() /// # use textwrap::Options; /// # use textwrap::word_splitters::{NoHyphenation, HyphenSplitter}; /// # const word_splitter: NoHyphenation = NoHyphenation; - /// # const width: usize = 80; + /// # const width: u32 = 80; /// # let actual = Options::with_word_splitter(width, word_splitter); /// # let expected = /// Options::new(width).word_splitter(word_splitter) @@ -410,7 +410,7 @@ impl<'a, WordSplit> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!() /// ``` /// use textwrap::Options; /// use textwrap::word_splitters::{HyphenSplitter, NoHyphenation, WordSplitter}; - /// # const width: usize = 80; + /// # const width: u32 = 80; /// /// // The type annotation is important, otherwise it will be not a trait object /// let mut options: Options<_, _, Box> @@ -434,7 +434,7 @@ impl<'a, WordSplit> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!() /// use textwrap::word_splitters::HyphenSplitter; /// use textwrap::word_separators::AsciiSpace; /// use textwrap::wrap_algorithms::FirstFit; - /// # const width: usize = 80; + /// # const width: u32 = 80; /// /// # #[cfg(all(not(feature = "smawk"), not(feature = "unicode-linebreak")))] { /// const FOO: Options = @@ -442,7 +442,7 @@ impl<'a, WordSplit> Options<'a, DefaultWrapAlgorithm!(), DefaultWordSeparator!() /// static BAR: Options = FOO; /// # } /// ``` - pub const fn with_word_splitter(width: usize, word_splitter: WordSplit) -> Self { + pub const fn with_word_splitter(width: u32, word_splitter: WordSplit) -> Self { Options { width, initial_indent: "", @@ -645,8 +645,8 @@ impl<'a, WrapAlgo, WordSep, WordSplit> Options<'a, WrapAlgo, WordSep, WordSplit> /// **Note:** Only available when the `terminal_size` Cargo feature is /// enabled. #[cfg(feature = "terminal_size")] -pub fn termwidth() -> usize { - terminal_size::terminal_size().map_or(80, |(terminal_size::Width(w), _)| w.into()) +pub fn termwidth() -> u32 { + terminal_size::terminal_size().map_or(80, |(terminal_size::Width(w), _)| u32::from(w)) } /// Fill a line of text at a given width. @@ -1200,6 +1200,7 @@ where Opt: Into>, { assert!(columns > 0); + assert!(columns < u32::max_value() as usize); let mut options = total_width_or_options.into(); @@ -1207,11 +1208,11 @@ where .width .saturating_sub(core::display_width(left_gap)) .saturating_sub(core::display_width(right_gap)) - .saturating_sub(core::display_width(middle_gap) * (columns - 1)); + .saturating_sub(core::display_width(middle_gap) * (columns as u32 - 1)); - let column_width = std::cmp::max(inner_width / columns, 1); - options.width = column_width; - let last_column_padding = " ".repeat(inner_width % column_width); + let column_width = std::cmp::max(inner_width as usize / columns, 1); + options.width = column_width as u32; + let last_column_padding = " ".repeat(inner_width as usize % column_width); let wrapped_lines = wrap(text, options); let lines_per_column = wrapped_lines.len() / columns + usize::from(wrapped_lines.len() % columns > 0); @@ -1222,7 +1223,9 @@ where match wrapped_lines.get(line_no + column_no * lines_per_column) { Some(column_line) => { line.push_str(column_line); - line.push_str(&" ".repeat(column_width - core::display_width(column_line))); + line.push_str( + &" ".repeat(column_width - core::display_width(column_line) as usize), + ); } None => { line.push_str(&" ".repeat(column_width)); @@ -1295,7 +1298,7 @@ where /// Please see the [`linear` /// benchmark](https://github.com/mgeisler/textwrap/blob/master/benches/linear.rs) /// for details. -pub fn fill_inplace(text: &mut String, width: usize) { +pub fn fill_inplace(text: &mut String, width: u32) { use word_separators::WordSeparator; let mut indices = Vec::new(); @@ -1342,7 +1345,7 @@ mod tests { #[test] fn options_agree_with_usize() { - let opt_usize = Options::from(42_usize); + let opt_usize = Options::from(42_u32); let opt_options = Options::new(42); assert_eq!(opt_usize.width, opt_options.width); @@ -1393,7 +1396,9 @@ mod tests { #[test] fn max_width() { - assert_eq!(wrap("foo bar", usize::max_value()), vec!["foo bar"]); + assert_eq!(wrap("foo bar", u32::max_value()), vec!["foo bar"]); + let long_text = "foobar baz foobar baz foobar baz foobar baz foobar baz"; + assert_eq!(wrap(&long_text, u32::max_value()), vec![long_text]); } #[test] diff --git a/src/wrap_algorithms.rs b/src/wrap_algorithms.rs index c216e708..2773f44b 100644 --- a/src/wrap_algorithms.rs +++ b/src/wrap_algorithms.rs @@ -37,7 +37,7 @@ pub trait WrapAlgorithm: WrapAlgorithmClone + std::fmt::Debug { /// can be used to implement hanging indentation. /// /// Please see the implementors of the trait for examples. - fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>; + fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [u32]) -> Vec<&'b [Word<'a>]>; } // The internal `WrapAlgorithmClone` trait is allows us to implement @@ -63,7 +63,7 @@ impl Clone for Box { } impl WrapAlgorithm for Box { - fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]> { + fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [u32]) -> Vec<&'b [Word<'a>]> { use std::ops::Deref; self.deref().wrap(words, line_widths) } @@ -92,7 +92,7 @@ impl Default for FirstFit { impl WrapAlgorithm for FirstFit { #[inline] - fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]> { + fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [u32]) -> Vec<&'b [Word<'a>]> { wrap_first_fit(words, line_widths) } } @@ -178,15 +178,15 @@ impl WrapAlgorithm for FirstFit { /// #[derive(Debug)] /// struct Task<'a> { /// name: &'a str, -/// hours: usize, // Time needed to complete task. -/// sweep: usize, // Time needed for a quick sweep after task during the day. -/// cleanup: usize, // Time needed for full cleanup if day ends with this task. +/// hours: u32, // Time needed to complete task. +/// sweep: u32, // Time needed for a quick sweep after task during the day. +/// cleanup: u32, // Time needed for full cleanup if day ends with this task. /// } /// /// impl Fragment for Task<'_> { -/// fn width(&self) -> usize { self.hours } -/// fn whitespace_width(&self) -> usize { self.sweep } -/// fn penalty_width(&self) -> usize { self.cleanup } +/// fn width(&self) -> u32 { self.hours } +/// fn whitespace_width(&self) -> u32 { self.sweep } +/// fn penalty_width(&self) -> u32 { self.cleanup } /// } /// /// // The morning tasks @@ -205,14 +205,14 @@ impl WrapAlgorithm for FirstFit { /// // Fill tasks into days, taking `day_length` into account. The /// // output shows the hours worked per day along with the names of /// // the tasks for that day. -/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: usize) -> Vec<(usize, Vec<&'a str>)> { +/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: u32) -> Vec<(u32, Vec<&'a str>)> { /// let mut days = Vec::new(); /// // Assign tasks to days. The assignment is a vector of slices, /// // with a slice per day. /// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]); /// for day in assigned_days.iter() { /// let last = day.last().unwrap(); -/// let work_hours: usize = day.iter().map(|t| t.hours + t.sweep).sum(); +/// let work_hours: u32 = day.iter().map(|t| t.hours + t.sweep).sum(); /// let names = day.iter().map(|t| t.name).collect::>(); /// days.push((work_hours - last.sweep + last.cleanup, names)); /// } @@ -247,26 +247,65 @@ impl WrapAlgorithm for FirstFit { /// knows how long each step takes :-) pub fn wrap_first_fit<'a, 'b, T: Fragment>( fragments: &'a [T], - line_widths: &'b [usize], + line_widths: &'b [u32], ) -> Vec<&'a [T]> { // The final line width is used for all remaining lines. let default_line_width = line_widths.last().copied().unwrap_or(0); let mut lines = Vec::new(); let mut start = 0; - let mut width = 0; + let mut width: u64 = 0; for (idx, fragment) in fragments.iter().enumerate() { - let line_width = line_widths + let line_width: u64 = line_widths .get(lines.len()) .copied() - .unwrap_or(default_line_width); - if width + fragment.width() + fragment.penalty_width() > line_width && idx > start { + .unwrap_or(default_line_width) + .into(); + if width + fragment.width() as u64 + fragment.penalty_width() as u64 > line_width + && idx > start + { lines.push(&fragments[start..idx]); start = idx; width = 0; } - width += fragment.width() + fragment.whitespace_width(); + width += fragment.width() as u64 + fragment.whitespace_width() as u64; } lines.push(&fragments[start..]); lines } + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, Eq, PartialEq)] + struct Word(u32); + + #[rustfmt::skip] + impl Fragment for Word { + fn width(&self) -> u32 { self.0 } + fn whitespace_width(&self) -> u32 { 1 } + fn penalty_width(&self) -> u32 { 0 } + } + + #[test] + fn wrap_string_longer_than_u32() { + let words = vec![ + Word(10_000), + Word(20_000), + Word(30_000), + Word(40_000), + Word(50_000), + ]; + + assert_eq!( + wrap_first_fit(&words, &[45_000]), + &[ + vec![Word(10_000), Word(20_000)], + vec![Word(30_000)], + vec![Word(40_000)], + vec![Word(50_000)], + ] + ); + } +} diff --git a/src/wrap_algorithms/optimal_fit.rs b/src/wrap_algorithms/optimal_fit.rs index 74bd6d31..d9b5798b 100644 --- a/src/wrap_algorithms/optimal_fit.rs +++ b/src/wrap_algorithms/optimal_fit.rs @@ -24,7 +24,7 @@ use crate::wrap_algorithms::WrapAlgorithm; pub struct OptimalFit { /// Per-line penalty. This is added for every line, which makes it /// expensive to output more lines than the minimum required. - pub nline_penalty: i32, + pub nline_penalty: u32, /// Per-character cost for lines that overflow the target line width. /// @@ -40,24 +40,25 @@ pub struct OptimalFit { /// /// let short = "foo "; /// let long = "x".repeat(50); + /// let length = (short.len() + long.len()) as u32; /// let fragments = vec![Word::from(short), Word::from(&long)]; /// let penalties = OptimalFit::new(); /// /// // Perfect fit, both words are on a single line with no overflow. - /// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len()], &penalties); + /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties); /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); /// /// // The words no longer fit, yet we get a single line back. While /// // the cost of overflow (`1 * 2500`) is the same as the cost of the /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty` /// // which makes it cheaper to overflow than to use two lines. - /// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 1], &penalties); + /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1], &penalties); /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]); /// /// // The cost of overflow would be 2 * 2500, whereas the cost of /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 = /// // 3401`. We therefore get two lines. - /// let wrapped = wrap_optimal_fit(&fragments, &[short.len() + long.len() - 2], &penalties); + /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2], &penalties); /// assert_eq!(wrapped, vec![&[Word::from(short)], /// &[Word::from(&long)]]); /// ``` @@ -67,7 +68,7 @@ pub struct OptimalFit { /// character. If it overflows by more than one character, the /// overflow penalty will quickly outgrow the cost of the gap, as /// seen above. - pub overflow_penalty: i32, + pub overflow_penalty: u32, /// When should the a single word on the last line be considered /// "too short"? @@ -123,15 +124,15 @@ pub struct OptimalFit { /// "penalty."]); /// } /// ``` - pub short_last_line_fraction: usize, + pub short_last_line_fraction: u32, /// Penalty for a last line with a single short word. /// /// Set this to zero if you do not want to penalize short last lines. - pub short_last_line_penalty: i32, + pub short_last_line_penalty: u32, /// Penalty for lines ending with a hyphen. - pub hyphen_penalty: i32, + pub hyphen_penalty: u32, } impl OptimalFit { @@ -160,7 +161,7 @@ impl Default for OptimalFit { impl WrapAlgorithm for OptimalFit { #[inline] - fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]> { + fn wrap<'a, 'b>(&self, words: &'b [Word<'a>], line_widths: &'b [u32]) -> Vec<&'b [Word<'a>]> { wrap_optimal_fit(words, line_widths, self) } } @@ -268,16 +269,16 @@ impl LineNumbers { /// enabled. pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( fragments: &'a [T], - line_widths: &'b [usize], + line_widths: &'b [u32], penalties: &'b OptimalFit, ) -> Vec<&'a [T]> { // The final line width is used for all remaining lines. let default_line_width = line_widths.last().copied().unwrap_or(0); let mut widths = Vec::with_capacity(fragments.len() + 1); - let mut width = 0; + let mut width: u128 = 0; widths.push(width); for fragment in fragments { - width += fragment.width() + fragment.whitespace_width(); + width += fragment.width() as u128 + fragment.whitespace_width() as u128; widths.push(width); } @@ -290,43 +291,45 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( .get(line_number) .copied() .unwrap_or(default_line_width); - let target_width = std::cmp::max(1, line_width); + let target_width = std::cmp::max(1, line_width as u128); // Compute the width of a line spanning fragments[i..j] in // constant time. We need to adjust widths[j] by subtracting - // the whitespace of fragment[j-i] and then add the penalty. - let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width() - + fragments[j - 1].penalty_width(); + // the whitespace of fragment[j-1] and then add the penalty. + let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width() as u128 + + fragments[j - 1].penalty_width() as u128; // We compute cost of the line containing fragments[i..j]. We // start with values[i].1, which is the optimal cost for // breaking before fragments[i]. // // First, every extra line cost NLINE_PENALTY. - let mut cost = minima[i].1 + penalties.nline_penalty; + let mut cost = minima[i].1 + penalties.nline_penalty as u128; // Next, we add a penalty depending on the line length. if line_width > target_width { // Lines that overflow get a hefty penalty. - let overflow = (line_width - target_width) as i32; - cost += overflow * penalties.overflow_penalty; + let overflow = line_width - target_width; + cost += overflow * penalties.overflow_penalty as u128; } else if j < fragments.len() { // Other lines (except for the last line) get a milder // penalty which depend on the size of the gap. - let gap = (target_width - line_width) as i32; + let gap = target_width - line_width; cost += gap * gap; - } else if i + 1 == j && line_width < target_width / penalties.short_last_line_fraction { + } else if i + 1 == j + && line_width < target_width / penalties.short_last_line_fraction as u128 + { // The last line can have any size gap, but we do add a // penalty if the line is very short (typically because it // contains just a single word). - cost += penalties.short_last_line_penalty; + cost += penalties.short_last_line_penalty as u128; } // Finally, we discourage hyphens. if fragments[j - 1].penalty_width() > 0 { // TODO: this should use a penalty value from the fragment // instead. - cost += penalties.hyphen_penalty; + cost += penalties.hyphen_penalty as u128; } cost @@ -346,3 +349,44 @@ pub fn wrap_optimal_fit<'a, 'b, T: Fragment>( lines.reverse(); lines } + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, Eq, PartialEq)] + struct Word(u32); + + #[rustfmt::skip] + impl Fragment for Word { + fn width(&self) -> u32 { self.0 } + fn whitespace_width(&self) -> u32 { 1 } + fn penalty_width(&self) -> u32 { 0 } + } + + #[test] + fn wrap_string_longer_than_u32() { + let words = vec![ + Word(10_000), + Word(20_000), + Word(30_000), + Word(40_000), + Word(50_000), + ]; + + let penalties = &OptimalFit { + overflow_penalty: 10_000_000, + ..OptimalFit::default() + }; + + assert_eq!( + wrap_optimal_fit(&words, &[45_000], &penalties), + &[ + vec![Word(10_000), Word(20_000)], + vec![Word(30_000)], + vec![Word(40_000)], + vec![Word(50_000)], + ] + ); + } +}