Use u16 instead of usize for line width

Using an `usize` for the line width causes problems for the optimal-fit wrapping algorithm. The problem is that the algorithm works with integer values formed by (line_width - text_width)**2 When `line_width` is near `usize::max_value()`, this computation will overflow an `usize`. By limiting the line width to an `u16`, we can do the internal computations with `u64` and avoid the overflows.
mgeisler · Dec 12, 2021 · 1dfc75a · 1dfc75a
1 parent 6710e5f
commit 1dfc75a
Show file tree

Hide file tree

Showing 13 changed files with 302 additions and 103 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -134,12 +134,18 @@ jobs:
       - name: Build fuzz targets
         run: cargo fuzz build
 
-      - name: Fuzz test
+      - name: Fuzz test fill_first_fit
         run: cargo fuzz run fill_first_fit -- -max_total_time=30
 
-      - name: Minimize fuzz corpus
+      - name: Fuzz test fill_optimal_fit
+        run: cargo fuzz run fill_optimal_fit -- -max_total_time=30
+
+      - name: Minimize fill_first_fit corpus
         run: cargo fuzz cmin fill_first_fit
 
+      - name: Minimize fill_optimal_fit corpus
+        run: cargo fuzz cmin fill_optimal_fit
+
   binary-sizes:
     name: Compute binary sizes
     runs-on: ubuntu-latest

diff --git a/benches/linear.rs b/benches/linear.rs
@@ -9,7 +9,7 @@ use criterion::{criterion_group, criterion_main};
 
 use lipsum::lipsum_words_from_seed;
 
-const LINE_LENGTH: usize = 60;
+const LINE_LENGTH: u16 = 60;
 
 /// Generate a lorem ipsum text with the given number of characters.
 fn lorem_ipsum(length: usize) -> String {

diff --git a/examples/layout.rs b/examples/layout.rs
@@ -7,7 +7,8 @@ fn main() {
                    Zero-cost abstractions.";
     let mut prev_lines = vec![];
 
-    let mut options = Options::new(0).word_splitter(Box::new(HyphenSplitter) as Box<dyn WordSplitter>);
+    let mut options =
+        Options::new(0).word_splitter(Box::new(HyphenSplitter) as Box<dyn WordSplitter>);
     #[cfg(feature = "hyphenation")]
     {
         use hyphenation::Load;
@@ -21,9 +22,9 @@ fn main() {
         let lines = wrap(example, &options);
         if lines != prev_lines {
             let title = format!(" Width: {} ", width);
-            println!(".{:-^1$}.", title, width + 2);
+            println!(".{:-^1$}.", title, (width + 2).into());
             for line in &lines {
-                println!("| {:1$} |", line, width);
+                println!("| {:1$} |", line, width.into());
             }
             prev_lines = lines;
         }

diff --git a/examples/wasm/src/lib.rs b/examples/wasm/src/lib.rs
@@ -145,22 +145,22 @@ impl<'a> CanvasWord<'a> {
     }
 }
 
-const PRECISION: usize = 10;
+const PRECISION: u16 = 10;
 
 impl textwrap::core::Fragment for CanvasWord<'_> {
     #[inline]
-    fn width(&self) -> usize {
-        (self.width * PRECISION as f64) as usize
+    fn width(&self) -> u16 {
+        (self.width * PRECISION as f64) as u16
     }
 
     #[inline]
-    fn whitespace_width(&self) -> usize {
-        (self.whitespace_width * PRECISION as f64) as usize
+    fn whitespace_width(&self) -> u16 {
+        (self.whitespace_width * PRECISION as f64) as u16
     }
 
     #[inline]
-    fn penalty_width(&self) -> usize {
-        (self.penalty_width * PRECISION as f64) as usize
+    fn penalty_width(&self) -> u16 {
+        (self.penalty_width * PRECISION as f64) as u16
     }
 }
 
@@ -250,22 +250,22 @@ pub enum WasmWrapAlgorithm {
 #[wasm_bindgen]
 #[derive(Copy, Clone, Debug, Default)]
 pub struct WasmOptimalFit {
-    pub nline_penalty: i32,
-    pub overflow_penalty: i32,
-    pub short_last_line_fraction: usize,
-    pub short_last_line_penalty: i32,
-    pub hyphen_penalty: i32,
+    pub nline_penalty: u32,
+    pub overflow_penalty: u32,
+    pub short_last_line_fraction: u32,
+    pub short_last_line_penalty: u32,
+    pub hyphen_penalty: u32,
 }
 
 #[wasm_bindgen]
 impl WasmOptimalFit {
     #[wasm_bindgen(constructor)]
     pub fn new(
-        nline_penalty: i32,
-        overflow_penalty: i32,
-        short_last_line_fraction: usize,
-        short_last_line_penalty: i32,
-        hyphen_penalty: i32,
+        nline_penalty: u32,
+        overflow_penalty: u32,
+        short_last_line_fraction: u32,
+        short_last_line_penalty: u32,
+        hyphen_penalty: u32,
     ) -> WasmOptimalFit {
         WasmOptimalFit {
             nline_penalty,
@@ -292,7 +292,7 @@ impl Into<OptimalFit> for WasmOptimalFit {
 #[wasm_bindgen]
 #[derive(Copy, Clone, Debug)]
 pub struct WasmOptions {
-    pub width: usize,
+    pub width: u16,
     pub break_words: bool,
     pub word_separator: WasmWordSeparator,
     pub word_splitter: WasmWordSplitter,
@@ -304,7 +304,7 @@ pub struct WasmOptions {
 impl WasmOptions {
     #[wasm_bindgen(constructor)]
     pub fn new(
-        width: usize,
+        width: u16,
         break_words: bool,
         word_separator: WasmWordSeparator,
         word_splitter: WasmWordSplitter,

diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
@@ -10,21 +10,34 @@ edition = "2018"
 cargo-fuzz = true
 
 [dependencies]
-libfuzzer-sys = "0.3"
+arbitrary = { version = "1", features = ["derive"] }
+libfuzzer-sys = "0.4"
 textwrap = { path = ".." }
 
 # Prevent this from interfering with workspaces
 [workspace]
 members = ["."]
 
+[[bin]]
+name = "fill_first_fit"
+path = "fuzz_targets/fill_first_fit.rs"
+test = false
+doc = false
+
+[[bin]]
+name = "wrap_first_fit"
+path = "fuzz_targets/wrap_first_fit.rs"
+test = false
+doc = false
+
 [[bin]]
 name = "fill_optimal_fit"
 path = "fuzz_targets/fill_optimal_fit.rs"
 test = false
 doc = false
 
 [[bin]]
-name = "fill_first_fit"
-path = "fuzz_targets/fill_first_fit.rs"
+name = "wrap_optimal_fit"
+path = "fuzz_targets/wrap_optimal_fit.rs"
 test = false
 doc = false
diff --git a/fuzz/fuzz_targets/fill_first_fit.rs b/fuzz/fuzz_targets/fill_first_fit.rs
@@ -3,7 +3,7 @@ use libfuzzer_sys::fuzz_target;
 use textwrap::wrap_algorithms;
 use textwrap::Options;
 
-fuzz_target!(|input: (String, usize)| {
+fuzz_target!(|input: (String, u16)| {
     let options = Options::new(input.1).wrap_algorithm(wrap_algorithms::FirstFit);
     let _ = textwrap::fill(&input.0, &options);
 });
diff --git a/fuzz/fuzz_targets/fill_optimal_fit.rs b/fuzz/fuzz_targets/fill_optimal_fit.rs
@@ -3,7 +3,7 @@ use libfuzzer_sys::fuzz_target;
 use textwrap::wrap_algorithms;
 use textwrap::Options;
 
-fuzz_target!(|input: (String, usize)| {
+fuzz_target!(|input: (String, u16)| {
     let options = Options::new(input.1).wrap_algorithm(wrap_algorithms::OptimalFit::default());
     let _ = textwrap::fill(&input.0, &options);
 });
diff --git a/fuzz/fuzz_targets/wrap_first_fit.rs b/fuzz/fuzz_targets/wrap_first_fit.rs
@@ -0,0 +1,31 @@
+#![no_main]
+use libfuzzer_sys::fuzz_target;
+use textwrap::core;
+use textwrap::wrap_algorithms::wrap_first_fit;
+
+#[derive(Debug, Eq, PartialEq)]
+struct Word(u16);
+
+impl core::Fragment for Word {
+    fn width(&self) -> u16 {
+        self.0
+    }
+    fn whitespace_width(&self) -> u16 {
+        1
+    }
+    fn penalty_width(&self) -> u16 {
+        0
+    }
+}
+
+fuzz_target!(|input: (u16, Vec<u16>)| {
+    let width = input.0;
+    let word_widths = input.1;
+    // The words here can easily have a combined width which doesn't
+    // fit in an u16.
+    let words = word_widths
+        .iter()
+        .map(|&width| Word(width))
+        .collect::<Vec<Word>>();
+    let _ = wrap_first_fit(&words, &[width]);
+});
diff --git a/fuzz/fuzz_targets/wrap_optimal_fit.rs b/fuzz/fuzz_targets/wrap_optimal_fit.rs
@@ -0,0 +1,54 @@
+#![no_main]
+use arbitrary::Arbitrary;
+use libfuzzer_sys::fuzz_target;
+use textwrap::core;
+use textwrap::wrap_algorithms::{wrap_optimal_fit, OptimalFit};
+
+#[derive(Arbitrary, Debug)]
+struct Penalties {
+    nline_penalty: u32,
+    overflow_penalty: u32,
+    short_last_line_fraction: u64,
+    short_last_line_penalty: u32,
+    hyphen_penalty: u32,
+}
+
+impl Into<OptimalFit> for Penalties {
+    fn into(self) -> OptimalFit {
+        OptimalFit {
+            nline_penalty: self.nline_penalty,
+            overflow_penalty: self.overflow_penalty,
+            short_last_line_fraction: std::cmp::max(1, self.short_last_line_fraction),
+            short_last_line_penalty: self.short_last_line_penalty,
+            hyphen_penalty: self.hyphen_penalty,
+        }
+    }
+}
+
+#[derive(Arbitrary, Debug, Eq, PartialEq)]
+struct Word {
+    width: u16,
+    whitespace_width: u16,
+    penalty_width: u16,
+}
+
+impl core::Fragment for Word {
+    fn width(&self) -> u16 {
+        self.width
+    }
+    fn whitespace_width(&self) -> u16 {
+        self.whitespace_width
+    }
+    fn penalty_width(&self) -> u16 {
+        self.penalty_width
+    }
+}
+
+fuzz_target!(|input: (u16, Vec<Word>, Penalties)| {
+    let width = input.0;
+    // The words here have a combined width much longer than an u16.
+    let words = input.1;
+    let penalties = input.2.into();
+
+    let _ = wrap_optimal_fit(&words, &[width], &penalties);
+});
diff --git a/src/core.rs b/src/core.rs
@@ -35,6 +35,8 @@
 //! the functionality here is not sufficient or if you have ideas for
 //! improving it. We would love to hear from you!
 
+use std::convert::TryInto;
+
 /// The CSI or “Control Sequence Introducer” introduces an ANSI escape
 /// sequence. This is typically used for colored text and will be
 /// ignored when computing the text width.
@@ -62,8 +64,10 @@ pub(crate) fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, char
 
 #[cfg(feature = "unicode-width")]
 #[inline]
-fn ch_width(ch: char) -> usize {
-    unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0)
+fn ch_width(ch: char) -> u16 {
+    unicode_width::UnicodeWidthChar::width(ch)
+        .and_then(|w| w.try_into().ok())
+        .unwrap_or(0)
 }
 
 /// First character which [`ch_width`] will classify as double-width.
@@ -73,7 +77,7 @@ const DOUBLE_WIDTH_CUTOFF: char = '\u{1100}';
 
 #[cfg(not(feature = "unicode-width"))]
 #[inline]
-fn ch_width(ch: char) -> usize {
+fn ch_width(ch: char) -> u16 {
     if ch < DOUBLE_WIDTH_CUTOFF {
         1
     } else {
@@ -173,7 +177,7 @@ fn ch_width(ch: char) -> usize {
 /// [Unicode equivalence]: https://en.wikipedia.org/wiki/Unicode_equivalence
 /// [CJK characters]: https://en.wikipedia.org/wiki/CJK_characters
 /// [emoji modifier sequences]: https://unicode.org/emoji/charts/full-emoji-modifiers.html
-pub fn display_width(text: &str) -> usize {
+pub fn display_width(text: &str) -> u16 {
     let mut chars = text.chars();
     let mut width = 0;
     while let Some(ch) = chars.next() {
@@ -197,15 +201,15 @@ pub fn display_width(text: &str) -> usize {
 /// the displayed width of each part, which this trait provides.
 pub trait Fragment: std::fmt::Debug {
     /// Displayed width of word represented by this fragment.
-    fn width(&self) -> usize;
+    fn width(&self) -> u16;
 
     /// Displayed width of the whitespace that must follow the word
     /// when the word is not at the end of a line.
-    fn whitespace_width(&self) -> usize;
+    fn whitespace_width(&self) -> u16;
 
     /// Displayed width of the penalty that must be inserted if the
     /// word falls at the end of a line.
-    fn penalty_width(&self) -> usize;
+    fn penalty_width(&self) -> u16;
 }
 
 /// A piece of wrappable text, including any trailing whitespace.
@@ -221,7 +225,7 @@ pub struct Word<'a> {
     /// Penalty string to insert if the word falls at the end of a line.
     pub penalty: &'a str,
     // Cached width in columns.
-    pub(crate) width: usize,
+    pub(crate) width: u16,
 }
 
 impl std::ops::Deref for Word<'_> {
@@ -260,7 +264,7 @@ impl<'a> Word<'a> {
     ///     vec![Word::from("Hel"), Word::from("lo!  ")]
     /// );
     /// ```
-    pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator<Item = Word<'a>> + 'b {
+    pub fn break_apart<'b>(&'b self, line_width: u16) -> impl Iterator<Item = Word<'a>> + 'b {
         let mut char_indices = self.word.char_indices();
         let mut offset = 0;
         let mut width = 0;
@@ -304,22 +308,22 @@ impl<'a> Word<'a> {
 
 impl Fragment for Word<'_> {
     #[inline]
-    fn width(&self) -> usize {
+    fn width(&self) -> u16 {
         self.width
     }
 
     // We assume the whitespace consist of ' ' only. This allows us to
     // compute the display width in constant time.
     #[inline]
-    fn whitespace_width(&self) -> usize {
-        self.whitespace.len()
+    fn whitespace_width(&self) -> u16 {
+        self.whitespace.len().try_into().expect("Width exceeds u16")
     }
 
     // We assume the penalty is `""` or `"-"`. This allows us to
     // compute the display width in constant time.
     #[inline]
-    fn penalty_width(&self) -> usize {
-        self.penalty.len()
+    fn penalty_width(&self) -> u16 {
+        self.penalty.len().try_into().expect("Width exceeds u16")
     }
 }
 
@@ -328,7 +332,7 @@ impl Fragment for Word<'_> {
 /// This simply calls [`Word::break_apart`] on words that are too
 /// wide. This means that no extra `'-'` is inserted, the word is
 /// simply broken into smaller pieces.
-pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec<Word<'a>>
+pub fn break_words<'a, I>(words: I, line_width: u16) -> Vec<Word<'a>>
 where
     I: IntoIterator<Item = Word<'a>>,
 {