Skip to content

Commit

Permalink
Auto merge of rust-lang#91527 - the8472:retain-opt, r=dtolnay
Browse files Browse the repository at this point in the history
Optimize `vec::retain` performance

This simply moves the loops into the inner function which leads to better results.

```
old:

test vec::bench_retain_100000                            ... bench:     203,828 ns/iter (+/- 2,101)
test vec::bench_retain_iter_100000                       ... bench:      63,324 ns/iter (+/- 12,305)
test vec::bench_retain_whole_100000                      ... bench:      42,989 ns/iter (+/- 291)

new:

test vec::bench_retain_100000                            ... bench:      42,180 ns/iter (+/- 451)
test vec::bench_retain_iter_100000                       ... bench:      65,167 ns/iter (+/- 11,971)
test vec::bench_retain_whole_100000                      ... bench:      33,736 ns/iter (+/- 12,404)
```

Measured on x86_64-unknown-linux-gnu, Zen2

Fixes rust-lang#91497
  • Loading branch information
bors committed Dec 16, 2021
2 parents 9e1aff8 + 67180ef commit a090c86
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 34 deletions.
19 changes: 17 additions & 2 deletions library/alloc/benches/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -733,11 +733,26 @@ fn bench_flat_map_collect(b: &mut Bencher) {
b.iter(|| v.iter().flat_map(|color| color.rotate_left(8).to_be_bytes()).collect::<Vec<_>>());
}

/// Reference benchmark that `retain` has to compete with.
#[bench]
fn bench_retain_iter_100000(b: &mut Bencher) {
let mut v = Vec::with_capacity(100000);

b.iter(|| {
let mut tmp = std::mem::take(&mut v);
tmp.clear();
tmp.extend(black_box(1..=100000));
v = tmp.into_iter().filter(|x| x & 1 == 0).collect();
});
}

#[bench]
fn bench_retain_100000(b: &mut Bencher) {
let v = (1..=100000).collect::<Vec<u32>>();
let mut v = Vec::with_capacity(100000);

b.iter(|| {
let mut v = v.clone();
v.clear();
v.extend(black_box(1..=100000));
v.retain(|x| x & 1 == 0)
});
}
Expand Down
61 changes: 29 additions & 32 deletions library/alloc/src/vec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1520,49 +1520,46 @@ impl<T, A: Allocator> Vec<T, A> {

let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len };

// process_one return a bool indicates whether the processing element should be retained.
#[inline(always)]
fn process_one<F, T, A: Allocator, const DELETED: bool>(
fn process_loop<F, T, A: Allocator, const DELETED: bool>(
original_len: usize,
f: &mut F,
g: &mut BackshiftOnDrop<'_, T, A>,
) -> bool
where
) where
F: FnMut(&mut T) -> bool,
{
// SAFETY: Unchecked element must be valid.
let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
if !f(cur) {
// Advance early to avoid double drop if `drop_in_place` panicked.
g.processed_len += 1;
g.deleted_cnt += 1;
// SAFETY: We never touch this element again after dropped.
unsafe { ptr::drop_in_place(cur) };
// We already advanced the counter.
return false;
}
if DELETED {
// SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
// We use copy for move, and never touch this element again.
unsafe {
let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt);
ptr::copy_nonoverlapping(cur, hole_slot, 1);
while g.processed_len != original_len {
// SAFETY: Unchecked element must be valid.
let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
if !f(cur) {
// Advance early to avoid double drop if `drop_in_place` panicked.
g.processed_len += 1;
g.deleted_cnt += 1;
// SAFETY: We never touch this element again after dropped.
unsafe { ptr::drop_in_place(cur) };
// We already advanced the counter.
if DELETED {
continue;
} else {
break;
}
}
if DELETED {
// SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
// We use copy for move, and never touch this element again.
unsafe {
let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt);
ptr::copy_nonoverlapping(cur, hole_slot, 1);
}
}
g.processed_len += 1;
}
g.processed_len += 1;
return true;
}

// Stage 1: Nothing was deleted.
while g.processed_len != original_len {
if !process_one::<F, T, A, false>(&mut f, &mut g) {
break;
}
}
process_loop::<F, T, A, false>(original_len, &mut f, &mut g);

// Stage 2: Some elements were deleted.
while g.processed_len != original_len {
process_one::<F, T, A, true>(&mut f, &mut g);
}
process_loop::<F, T, A, true>(original_len, &mut f, &mut g);

// All item are processed. This can be optimized to `set_len` by LLVM.
drop(g);
Expand Down

0 comments on commit a090c86

Please sign in to comment.