Skip to content

Commit

Permalink
Use u16 instead of usize for line width
Browse files Browse the repository at this point in the history
Using an `usize` for the line width causes problems for the
optimal-fit wrapping algorithm. The problem is that the algorithm
works with integer values formed by

    (line_width - text_width)**2

When `line_width` is near `usize::max_value()`, this computation will
overflow an `usize`. By limiting the line width to an `u16`, we can do
the internal computations with `u64` and avoid the overflows.
  • Loading branch information
mgeisler committed Dec 12, 2021
1 parent 6710e5f commit 1dfc75a
Show file tree
Hide file tree
Showing 13 changed files with 302 additions and 103 deletions.
10 changes: 8 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,18 @@ jobs:
- name: Build fuzz targets
run: cargo fuzz build

- name: Fuzz test
- name: Fuzz test fill_first_fit
run: cargo fuzz run fill_first_fit -- -max_total_time=30

- name: Minimize fuzz corpus
- name: Fuzz test fill_optimal_fit
run: cargo fuzz run fill_optimal_fit -- -max_total_time=30

- name: Minimize fill_first_fit corpus
run: cargo fuzz cmin fill_first_fit

- name: Minimize fill_optimal_fit corpus
run: cargo fuzz cmin fill_optimal_fit

binary-sizes:
name: Compute binary sizes
runs-on: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion benches/linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use criterion::{criterion_group, criterion_main};

use lipsum::lipsum_words_from_seed;

const LINE_LENGTH: usize = 60;
const LINE_LENGTH: u16 = 60;

/// Generate a lorem ipsum text with the given number of characters.
fn lorem_ipsum(length: usize) -> String {
Expand Down
7 changes: 4 additions & 3 deletions examples/layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ fn main() {
Zero-cost abstractions.";
let mut prev_lines = vec![];

let mut options = Options::new(0).word_splitter(Box::new(HyphenSplitter) as Box<dyn WordSplitter>);
let mut options =
Options::new(0).word_splitter(Box::new(HyphenSplitter) as Box<dyn WordSplitter>);
#[cfg(feature = "hyphenation")]
{
use hyphenation::Load;
Expand All @@ -21,9 +22,9 @@ fn main() {
let lines = wrap(example, &options);
if lines != prev_lines {
let title = format!(" Width: {} ", width);
println!(".{:-^1$}.", title, width + 2);
println!(".{:-^1$}.", title, (width + 2).into());
for line in &lines {
println!("| {:1$} |", line, width);
println!("| {:1$} |", line, width.into());
}
prev_lines = lines;
}
Expand Down
38 changes: 19 additions & 19 deletions examples/wasm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,22 +145,22 @@ impl<'a> CanvasWord<'a> {
}
}

const PRECISION: usize = 10;
const PRECISION: u16 = 10;

impl textwrap::core::Fragment for CanvasWord<'_> {
#[inline]
fn width(&self) -> usize {
(self.width * PRECISION as f64) as usize
fn width(&self) -> u16 {
(self.width * PRECISION as f64) as u16
}

#[inline]
fn whitespace_width(&self) -> usize {
(self.whitespace_width * PRECISION as f64) as usize
fn whitespace_width(&self) -> u16 {
(self.whitespace_width * PRECISION as f64) as u16
}

#[inline]
fn penalty_width(&self) -> usize {
(self.penalty_width * PRECISION as f64) as usize
fn penalty_width(&self) -> u16 {
(self.penalty_width * PRECISION as f64) as u16
}
}

Expand Down Expand Up @@ -250,22 +250,22 @@ pub enum WasmWrapAlgorithm {
#[wasm_bindgen]
#[derive(Copy, Clone, Debug, Default)]
pub struct WasmOptimalFit {
pub nline_penalty: i32,
pub overflow_penalty: i32,
pub short_last_line_fraction: usize,
pub short_last_line_penalty: i32,
pub hyphen_penalty: i32,
pub nline_penalty: u32,
pub overflow_penalty: u32,
pub short_last_line_fraction: u32,
pub short_last_line_penalty: u32,
pub hyphen_penalty: u32,
}

#[wasm_bindgen]
impl WasmOptimalFit {
#[wasm_bindgen(constructor)]
pub fn new(
nline_penalty: i32,
overflow_penalty: i32,
short_last_line_fraction: usize,
short_last_line_penalty: i32,
hyphen_penalty: i32,
nline_penalty: u32,
overflow_penalty: u32,
short_last_line_fraction: u32,
short_last_line_penalty: u32,
hyphen_penalty: u32,
) -> WasmOptimalFit {
WasmOptimalFit {
nline_penalty,
Expand All @@ -292,7 +292,7 @@ impl Into<OptimalFit> for WasmOptimalFit {
#[wasm_bindgen]
#[derive(Copy, Clone, Debug)]
pub struct WasmOptions {
pub width: usize,
pub width: u16,
pub break_words: bool,
pub word_separator: WasmWordSeparator,
pub word_splitter: WasmWordSplitter,
Expand All @@ -304,7 +304,7 @@ pub struct WasmOptions {
impl WasmOptions {
#[wasm_bindgen(constructor)]
pub fn new(
width: usize,
width: u16,
break_words: bool,
word_separator: WasmWordSeparator,
word_splitter: WasmWordSplitter,
Expand Down
19 changes: 16 additions & 3 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,34 @@ edition = "2018"
cargo-fuzz = true

[dependencies]
libfuzzer-sys = "0.3"
arbitrary = { version = "1", features = ["derive"] }
libfuzzer-sys = "0.4"
textwrap = { path = ".." }

# Prevent this from interfering with workspaces
[workspace]
members = ["."]

[[bin]]
name = "fill_first_fit"
path = "fuzz_targets/fill_first_fit.rs"
test = false
doc = false

[[bin]]
name = "wrap_first_fit"
path = "fuzz_targets/wrap_first_fit.rs"
test = false
doc = false

[[bin]]
name = "fill_optimal_fit"
path = "fuzz_targets/fill_optimal_fit.rs"
test = false
doc = false

[[bin]]
name = "fill_first_fit"
path = "fuzz_targets/fill_first_fit.rs"
name = "wrap_optimal_fit"
path = "fuzz_targets/wrap_optimal_fit.rs"
test = false
doc = false
2 changes: 1 addition & 1 deletion fuzz/fuzz_targets/fill_first_fit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use libfuzzer_sys::fuzz_target;
use textwrap::wrap_algorithms;
use textwrap::Options;

fuzz_target!(|input: (String, usize)| {
fuzz_target!(|input: (String, u16)| {
let options = Options::new(input.1).wrap_algorithm(wrap_algorithms::FirstFit);
let _ = textwrap::fill(&input.0, &options);
});
2 changes: 1 addition & 1 deletion fuzz/fuzz_targets/fill_optimal_fit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use libfuzzer_sys::fuzz_target;
use textwrap::wrap_algorithms;
use textwrap::Options;

fuzz_target!(|input: (String, usize)| {
fuzz_target!(|input: (String, u16)| {
let options = Options::new(input.1).wrap_algorithm(wrap_algorithms::OptimalFit::default());
let _ = textwrap::fill(&input.0, &options);
});
31 changes: 31 additions & 0 deletions fuzz/fuzz_targets/wrap_first_fit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use textwrap::core;
use textwrap::wrap_algorithms::wrap_first_fit;

#[derive(Debug, Eq, PartialEq)]
struct Word(u16);

impl core::Fragment for Word {
fn width(&self) -> u16 {
self.0
}
fn whitespace_width(&self) -> u16 {
1
}
fn penalty_width(&self) -> u16 {
0
}
}

fuzz_target!(|input: (u16, Vec<u16>)| {
let width = input.0;
let word_widths = input.1;
// The words here can easily have a combined width which doesn't
// fit in an u16.
let words = word_widths
.iter()
.map(|&width| Word(width))
.collect::<Vec<Word>>();
let _ = wrap_first_fit(&words, &[width]);
});
54 changes: 54 additions & 0 deletions fuzz/fuzz_targets/wrap_optimal_fit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#![no_main]
use arbitrary::Arbitrary;
use libfuzzer_sys::fuzz_target;
use textwrap::core;
use textwrap::wrap_algorithms::{wrap_optimal_fit, OptimalFit};

#[derive(Arbitrary, Debug)]
struct Penalties {
nline_penalty: u32,
overflow_penalty: u32,
short_last_line_fraction: u64,
short_last_line_penalty: u32,
hyphen_penalty: u32,
}

impl Into<OptimalFit> for Penalties {
fn into(self) -> OptimalFit {
OptimalFit {
nline_penalty: self.nline_penalty,
overflow_penalty: self.overflow_penalty,
short_last_line_fraction: std::cmp::max(1, self.short_last_line_fraction),
short_last_line_penalty: self.short_last_line_penalty,
hyphen_penalty: self.hyphen_penalty,
}
}
}

#[derive(Arbitrary, Debug, Eq, PartialEq)]
struct Word {
width: u16,
whitespace_width: u16,
penalty_width: u16,
}

impl core::Fragment for Word {
fn width(&self) -> u16 {
self.width
}
fn whitespace_width(&self) -> u16 {
self.whitespace_width
}
fn penalty_width(&self) -> u16 {
self.penalty_width
}
}

fuzz_target!(|input: (u16, Vec<Word>, Penalties)| {
let width = input.0;
// The words here have a combined width much longer than an u16.
let words = input.1;
let penalties = input.2.into();

let _ = wrap_optimal_fit(&words, &[width], &penalties);
});
34 changes: 19 additions & 15 deletions src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
//! the functionality here is not sufficient or if you have ideas for
//! improving it. We would love to hear from you!
use std::convert::TryInto;

/// The CSI or “Control Sequence Introducer” introduces an ANSI escape
/// sequence. This is typically used for colored text and will be
/// ignored when computing the text width.
Expand Down Expand Up @@ -62,8 +64,10 @@ pub(crate) fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, char

#[cfg(feature = "unicode-width")]
#[inline]
fn ch_width(ch: char) -> usize {
unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0)
fn ch_width(ch: char) -> u16 {
unicode_width::UnicodeWidthChar::width(ch)
.and_then(|w| w.try_into().ok())
.unwrap_or(0)
}

/// First character which [`ch_width`] will classify as double-width.
Expand All @@ -73,7 +77,7 @@ const DOUBLE_WIDTH_CUTOFF: char = '\u{1100}';

#[cfg(not(feature = "unicode-width"))]
#[inline]
fn ch_width(ch: char) -> usize {
fn ch_width(ch: char) -> u16 {
if ch < DOUBLE_WIDTH_CUTOFF {
1
} else {
Expand Down Expand Up @@ -173,7 +177,7 @@ fn ch_width(ch: char) -> usize {
/// [Unicode equivalence]: https://en.wikipedia.org/wiki/Unicode_equivalence
/// [CJK characters]: https://en.wikipedia.org/wiki/CJK_characters
/// [emoji modifier sequences]: https://unicode.org/emoji/charts/full-emoji-modifiers.html
pub fn display_width(text: &str) -> usize {
pub fn display_width(text: &str) -> u16 {
let mut chars = text.chars();
let mut width = 0;
while let Some(ch) = chars.next() {
Expand All @@ -197,15 +201,15 @@ pub fn display_width(text: &str) -> usize {
/// the displayed width of each part, which this trait provides.
pub trait Fragment: std::fmt::Debug {
/// Displayed width of word represented by this fragment.
fn width(&self) -> usize;
fn width(&self) -> u16;

/// Displayed width of the whitespace that must follow the word
/// when the word is not at the end of a line.
fn whitespace_width(&self) -> usize;
fn whitespace_width(&self) -> u16;

/// Displayed width of the penalty that must be inserted if the
/// word falls at the end of a line.
fn penalty_width(&self) -> usize;
fn penalty_width(&self) -> u16;
}

/// A piece of wrappable text, including any trailing whitespace.
Expand All @@ -221,7 +225,7 @@ pub struct Word<'a> {
/// Penalty string to insert if the word falls at the end of a line.
pub penalty: &'a str,
// Cached width in columns.
pub(crate) width: usize,
pub(crate) width: u16,
}

impl std::ops::Deref for Word<'_> {
Expand Down Expand Up @@ -260,7 +264,7 @@ impl<'a> Word<'a> {
/// vec![Word::from("Hel"), Word::from("lo! ")]
/// );
/// ```
pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator<Item = Word<'a>> + 'b {
pub fn break_apart<'b>(&'b self, line_width: u16) -> impl Iterator<Item = Word<'a>> + 'b {
let mut char_indices = self.word.char_indices();
let mut offset = 0;
let mut width = 0;
Expand Down Expand Up @@ -304,22 +308,22 @@ impl<'a> Word<'a> {

impl Fragment for Word<'_> {
#[inline]
fn width(&self) -> usize {
fn width(&self) -> u16 {
self.width
}

// We assume the whitespace consist of ' ' only. This allows us to
// compute the display width in constant time.
#[inline]
fn whitespace_width(&self) -> usize {
self.whitespace.len()
fn whitespace_width(&self) -> u16 {
self.whitespace.len().try_into().expect("Width exceeds u16")
}

// We assume the penalty is `""` or `"-"`. This allows us to
// compute the display width in constant time.
#[inline]
fn penalty_width(&self) -> usize {
self.penalty.len()
fn penalty_width(&self) -> u16 {
self.penalty.len().try_into().expect("Width exceeds u16")
}
}

Expand All @@ -328,7 +332,7 @@ impl Fragment for Word<'_> {
/// This simply calls [`Word::break_apart`] on words that are too
/// wide. This means that no extra `'-'` is inserted, the word is
/// simply broken into smaller pieces.
pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec<Word<'a>>
pub fn break_words<'a, I>(words: I, line_width: u16) -> Vec<Word<'a>>
where
I: IntoIterator<Item = Word<'a>>,
{
Expand Down
Loading

0 comments on commit 1dfc75a

Please sign in to comment.