Skip to content

Commit

Permalink
Handle overflows in wrap_optimal_fit by divide-and-conquer
Browse files Browse the repository at this point in the history
The `wrap_optimal_fit algorithm` computes the penalty for a gap as
`gap * gap`. If a fragment has a size near `usize::max_value()` and if
the line width is small, this computation can easily overflow.

When this happened, we would previously abort or unwind. Now, we
instead do the computations with checked arithmetic and detect the
overflow. We then proceed to wrap the half of the fragments by
themselves. If this work, we then wrap the second half. This way, we
might be able to wrap everything without overflow.

Should there be a single fragment which causes the overflow by itself,
this fragment is put on a line by itself.

When wrapping part of the fragments, we might of course end up with a
partial last line. To fix this, we simply pop this line and re-wrap
the fragments that were put onto this line. This ensures no “seams” in
the wrapping.

Fixes #247.
  • Loading branch information
mgeisler committed Dec 27, 2020
1 parent e26ef14 commit bba043c
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 20 deletions.
8 changes: 7 additions & 1 deletion fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ edition = "2018"
cargo-fuzz = true

[dependencies]
libfuzzer-sys = "0.3"
libfuzzer-sys = { version = "0.3", features = ["arbitrary-derive"] }
textwrap = { path = ".." }

# Prevent this from interfering with workspaces
Expand All @@ -28,3 +28,9 @@ name = "fill_first_fit"
path = "fuzz_targets/fill_first_fit.rs"
test = false
doc = false

[[bin]]
name = "wrap_optimal_fit"
path = "fuzz_targets/wrap_optimal_fit.rs"
test = false
doc = false
30 changes: 30 additions & 0 deletions fuzz/fuzz_targets/wrap_optimal_fit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#![no_main]
use libfuzzer_sys::{arbitrary, fuzz_target};
use textwrap::core;
use textwrap::core::Fragment;

#[derive(arbitrary::Arbitrary, Debug, Eq, PartialEq, Clone)]
struct BoxGluePenalty(usize, usize, usize);

#[rustfmt::skip]
impl core::Fragment for BoxGluePenalty {
fn width(&self) -> usize { self.0 }
fn whitespace_width(&self) -> usize { self.1 }
fn penalty_width(&self) -> usize { self.2 }
}

fuzz_target!(|input: (Vec<BoxGluePenalty>, u64)| {
let line_width = input.1 as usize;
let fragments = input.0.clone();

let total_width: Option<usize> = fragments.iter().fold(Some(0), |sum, f| {
sum.and_then(|sum| sum.checked_add(f.width()))
.and_then(|sum| sum.checked_add(f.whitespace_width()))
.and_then(|sum| sum.checked_add(f.penalty_width()))
});
if total_width.is_none() {
return;
}

let _ = core::wrap_optimal_fit(&fragments, &|_| line_width);
});
178 changes: 160 additions & 18 deletions src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
//! improving it. We would love to hear from you!
use crate::{Options, WordSplitter};
use std::cell::RefCell;
use std::cell::{Cell, RefCell};
use unicode_width::UnicodeWidthChar;

/// The CSI or “Control Sequence Introducer” introduces an ANSI escape
Expand Down Expand Up @@ -538,7 +538,7 @@ impl LineNumbers {
}
}

fn get(&self, i: usize, minima: &[(usize, i32)]) -> usize {
fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
while self.line_numbers.borrow_mut().len() < i + 1 {
let pos = self.line_numbers.borrow().len();
let line_number = 1 + self.get(minima[pos].0, &minima);
Expand All @@ -551,7 +551,7 @@ impl LineNumbers {

/// Per-line penalty. This is added for every line, which makes it
/// expensive to output more lines than the minimum required.
const NLINE_PENALTY: i32 = 1000;
const NLINE_PENALTY: usize = 1000;

/// Per-character cost for lines that overflow the target line width.
///
Expand Down Expand Up @@ -590,16 +590,16 @@ const NLINE_PENALTY: i32 = 1000;
/// _and_ if it happens to overflow the line by exactly one character.
/// If it overflows by more than one character, the overflow penalty
/// will quickly outgrow the cost of the gap, as seen above.
const OVERFLOW_PENALTY: i32 = 50 * 50;
const OVERFLOW_PENALTY: usize = 50 * 50;

/// The last line is short if it is less than 1/4 of the target width.
const SHORT_LINE_FRACTION: usize = 4;

/// Penalize a short last line.
const SHORT_LAST_LINE_PENALTY: i32 = 25;
const SHORT_LAST_LINE_PENALTY: usize = 25;

/// Penalty for lines ending with a hyphen.
const HYPHEN_PENALTY: i32 = 25;
const HYPHEN_PENALTY: usize = 25;

/// Wrap abstract fragments into lines with an optimal-fit algorithm.
///
Expand Down Expand Up @@ -675,6 +675,60 @@ pub fn wrap_optimal_fit<'a, T: Fragment, F: Fn(usize) -> usize>(
fragments: &'a [T],
line_widths: F,
) -> Vec<&'a [T]> {
let mut min_idx = 0;
let mut max_idx = fragments.len();

let mut result = Vec::new();

// We call wrap_optimal_fit_checked on smaller and smaller slices
// until we either end up with a single fragment or we find a
// slice which can be wrapped without overflow. In either case, we
// advance min_idx which ensures that we make progress.
loop {
match wrap_optimal_fit_checked(&fragments[min_idx..max_idx], &line_widths) {
Some(lines) => {
let partial_last_line = lines.len() > 1;
result.extend(lines);
if max_idx == fragments.len() {
return result; // All done!
}

min_idx = max_idx;
max_idx = fragments.len();

// We assume that the last wrapped line is incomplete
// and needs to be re-wrapped.
if partial_last_line {
let last_line = result.pop().unwrap();
min_idx -= last_line.len();
}
}
None => {
if max_idx - min_idx == 1 {
// This single fragment is causing an overflow, so
// we put on its own line.
result.push(&fragments[min_idx..max_idx]);
if max_idx == fragments.len() {
return result; // All done!
}

min_idx = max_idx;
max_idx = fragments.len();
} else {
max_idx = min_idx + (max_idx - min_idx) / 2;
}
}
}
}
}

/// Wrap abstract fragments into lines with an optimal-fit algorithm.
/// Returns `None` if an overflow occurs during the penalty
/// computations. See [`wrap_optimal_fit`].
fn wrap_optimal_fit_checked<'a, T: Fragment, F: Fn(usize) -> usize>(
fragments: &'a [T],
line_widths: F,
) -> Option<Vec<&'a [T]>> {
let mut widths = Vec::with_capacity(fragments.len() + 1);
let mut width = 0;
widths.push(width);
Expand All @@ -683,53 +737,76 @@ pub fn wrap_optimal_fit<'a, T: Fragment, F: Fn(usize) -> usize>(
widths.push(width);
}

if widths.last() < Some(&line_widths(0)) {
return Some(vec![fragments]);
}

let line_numbers = LineNumbers::new(fragments.len());
let detected_overflow = Cell::new(false);

let minima = smawk::online_column_minima(0, widths.len(), |minima, i, j| {
let cost_fn = |minima: &[(usize, usize)], i, j| -> Option<usize> {
// Line number for fragment `i`.
let line_number = line_numbers.get(i, &minima);
let target_width = std::cmp::max(1, line_widths(line_number));

// Compute the width of a line spanning fragments[i..j] in
// constant time. We need to adjust widths[j] by subtracting
// the whitespace of fragment[j-i] and then add the penalty.
let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
+ fragments[j - 1].penalty_width();
let last_fragment: &T = &fragments[j - 1];
let line_width = widths[j] - widths[i] - last_fragment.whitespace_width()
+ last_fragment.penalty_width();

// We compute cost of the line containing fragments[i..j]. We
// start with values[i].1, which is the optimal cost for
// breaking before fragments[i].
//
// First, every extra line cost NLINE_PENALTY.
let mut cost = minima[i].1 + NLINE_PENALTY;
let mut cost = minima[i].1.checked_add(NLINE_PENALTY)?;

// Next, we add a penalty depending on the line length.
if line_width > target_width {
// Lines that overflow get a hefty penalty.
let overflow = (line_width - target_width) as i32;
cost += overflow * OVERFLOW_PENALTY;
let overflow: usize = line_width - target_width;
cost = cost.checked_add(overflow.checked_mul(OVERFLOW_PENALTY)?)?;
} else if j < fragments.len() {
// Other lines (except for the last line) get a milder
// penalty which depend on the size of the gap.
let gap = (target_width - line_width) as i32;
cost += gap * gap;
let gap: usize = target_width - line_width;
cost = cost.checked_add(gap.checked_mul(gap)?)?;
} else if i + 1 == j && line_width < target_width / SHORT_LINE_FRACTION {
// The last line can have any size gap, but we do add a
// penalty if the line is very short (typically because it
// contains just a single word).
cost += SHORT_LAST_LINE_PENALTY;
cost = cost.checked_add(SHORT_LAST_LINE_PENALTY)?;
}

// Finally, we discourage hyphens.
if fragments[j - 1].penalty_width() > 0 {
// TODO: this should use a penalty value from the fragment
// instead.
cost += HYPHEN_PENALTY;
cost = cost.checked_add(HYPHEN_PENALTY)?;
}

cost
Some(cost)
};

let minima = smawk::online_column_minima(0, widths.len(), |minima: &[(usize, usize)], i, j| {
if detected_overflow.get() {
return 0;
}
match cost_fn(minima, i, j) {
Some(cost) => cost,
None => {
detected_overflow.set(true);
0
}
}
});

if detected_overflow.into_inner() {
return None;
}

let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
let mut pos = fragments.len();
loop {
Expand All @@ -742,7 +819,7 @@ pub fn wrap_optimal_fit<'a, T: Fragment, F: Fn(usize) -> usize>(
}

lines.reverse();
lines
Some(lines)
}

#[cfg(test)]
Expand Down Expand Up @@ -927,4 +1004,69 @@ mod tests {
]
);
}

#[derive(Debug, Eq, PartialEq)]
struct BoxGluePenalty(usize);

#[rustfmt::skip]
impl Fragment for BoxGluePenalty {
fn width(&self) -> usize { self.0 }
fn whitespace_width(&self) -> usize { 1 }
fn penalty_width(&self) -> usize { 0 }
}

#[test]
fn optimal_fit_single_fragment_overflow() {
let fragments = vec![BoxGluePenalty(2 << 60)];
let line_widths = |_| 80;

assert_eq!(wrap_optimal_fit_checked(&fragments, &line_widths), None);
assert_eq!(
wrap_optimal_fit(&fragments, &line_widths),
vec![[BoxGluePenalty(2 << 60)]]
);
}

#[test]
fn optimal_fit_rewrapping_on_overflow() {
let fragments = vec![
BoxGluePenalty(1001),
BoxGluePenalty(1002),
BoxGluePenalty(1003),
BoxGluePenalty(1004),
BoxGluePenalty(105), // small fragment
BoxGluePenalty(2 << 60), // over-sized fragment
BoxGluePenalty(1007),
BoxGluePenalty(1008),
];
let line_widths = |_| 2500; // Room for two big fragments.

assert_eq!(wrap_optimal_fit_checked(&fragments, &line_widths), None);
// First five fragments fit on two lines and the small 105
// fragment is included on the second line:
assert_eq!(
wrap_optimal_fit_checked(&fragments[..5], &line_widths).unwrap(),
vec![
vec![BoxGluePenalty(1001), BoxGluePenalty(1002)],
vec![
BoxGluePenalty(1003),
BoxGluePenalty(1004),
BoxGluePenalty(105)
]
]
);
assert_eq!(
wrap_optimal_fit(&fragments, &line_widths),
vec![
vec![BoxGluePenalty(1001), BoxGluePenalty(1002)],
vec![
BoxGluePenalty(1003),
BoxGluePenalty(1004),
BoxGluePenalty(105),
],
vec![BoxGluePenalty(2 << 60)],
vec![BoxGluePenalty(1007), BoxGluePenalty(1008)]
]
);
}
}
7 changes: 6 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -850,10 +850,15 @@ mod tests {
}

#[test]
fn max_width() {
fn max_width_usize() {
assert_eq!(wrap("foo bar", usize::max_value()), vec!["foo bar"]);
}

#[test]
fn max_width_usize_issue_247() {
assert_eq!(wrap("x y", 515566821223), vec!["x y"]);
}

#[test]
fn leading_whitespace() {
assert_eq!(wrap(" foo bar", 6), vec![" foo", "bar"]);
Expand Down

0 comments on commit bba043c

Please sign in to comment.