Skip to content

Commit

Permalink
Auto merge of #83458 - saethlin:improve-vec-benches, r=dtolnay
Browse files Browse the repository at this point in the history
Clean up Vec's benchmarks

The Vec benchmarks need a lot of love. I sort of noticed this in #83357 but the overall situation is much less awesome than I thought at the time. The first commit just removes a lot of asserts and does a touch of other cleanup.

A number of these benchmarks are poorly-named. For example, `bench_map_fast` is not in fact fast, `bench_rev_1` and `bench_rev_2` are vague, `bench_in_place_zip_iter_mut` doesn't call `zip`, `bench_in_place*` don't do anything in-place... Should I fix these, or is there tooling that depend on the names not changing?

I've also noticed that `bench_rev_1` and `bench_rev_2` are remarkably fragile. It looks like poking other code in `Vec` can cause the codegen of this benchmark to switch to a version that has almost exactly half its current throughput and I have absolutely no idea why.

Here's the fast version:
```asm
  0.69 │110:   movdqu -0x20(%rbx,%rdx,4),%xmm0
  1.76 │       movdqu -0x10(%rbx,%rdx,4),%xmm1
  0.71 │       pshufd $0x1b,%xmm1,%xmm1
  0.60 │       pshufd $0x1b,%xmm0,%xmm0
  3.68 │       movdqu %xmm1,-0x30(%rcx)
 14.36 │       movdqu %xmm0,-0x20(%rcx)
 13.88 │       movdqu -0x40(%rbx,%rdx,4),%xmm0
  6.64 │       movdqu -0x30(%rbx,%rdx,4),%xmm1
  0.76 │       pshufd $0x1b,%xmm1,%xmm1
  0.77 │       pshufd $0x1b,%xmm0,%xmm0
  1.87 │       movdqu %xmm1,-0x10(%rcx)
 13.01 │       movdqu %xmm0,(%rcx)
 38.81 │       add    $0x40,%rcx
  0.92 │       add    $0xfffffffffffffff0,%rdx
  1.22 │     ↑ jne    110
```
And the slow one:
```asm
  0.42 │9a880:   movdqa     %xmm2,%xmm1
  4.03 │9a884:   movq       -0x8(%rbx,%rsi,4),%xmm4
  8.49 │9a88a:   pshufd     $0xe1,%xmm4,%xmm4
  2.58 │9a88f:   movq       -0x10(%rbx,%rsi,4),%xmm5
  7.02 │9a895:   pshufd     $0xe1,%xmm5,%xmm5
  4.79 │9a89a:   punpcklqdq %xmm5,%xmm4
  5.77 │9a89e:   movdqu     %xmm4,-0x18(%rdx)
 15.74 │9a8a3:   movq       -0x18(%rbx,%rsi,4),%xmm4
  3.91 │9a8a9:   pshufd     $0xe1,%xmm4,%xmm4
  5.04 │9a8ae:   movq       -0x20(%rbx,%rsi,4),%xmm5
  5.29 │9a8b4:   pshufd     $0xe1,%xmm5,%xmm5
  4.60 │9a8b9:   punpcklqdq %xmm5,%xmm4
  9.81 │9a8bd:   movdqu     %xmm4,-0x8(%rdx)
 11.05 │9a8c2:   paddq      %xmm3,%xmm0
  0.86 │9a8c6:   paddq      %xmm3,%xmm2
  5.89 │9a8ca:   add        $0x20,%rdx
  0.12 │9a8ce:   add        $0xfffffffffffffff8,%rsi
  1.16 │9a8d2:   add        $0x2,%rdi
  2.96 │9a8d6: → jne        9a880 <<alloc::vec::Vec<T,A> as core::iter::traits::collect::Extend<&T>>::extend+0xd0>
```
  • Loading branch information
bors committed Mar 30, 2021
2 parents a0e229a + 8c88418 commit 689e847
Showing 1 changed file with 25 additions and 68 deletions.
93 changes: 25 additions & 68 deletions library/alloc/benches/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,13 @@ use test::{black_box, Bencher};

#[bench]
fn bench_new(b: &mut Bencher) {
b.iter(|| {
let v: Vec<u32> = Vec::new();
assert_eq!(v.len(), 0);
assert_eq!(v.capacity(), 0);
v
})
b.iter(|| Vec::<u32>::new())
}

fn do_bench_with_capacity(b: &mut Bencher, src_len: usize) {
b.bytes = src_len as u64;

b.iter(|| {
let v: Vec<u32> = Vec::with_capacity(src_len);
assert_eq!(v.len(), 0);
assert_eq!(v.capacity(), src_len);
v
})
b.iter(|| Vec::<u32>::with_capacity(src_len))
}

#[bench]
Expand All @@ -46,12 +36,7 @@ fn bench_with_capacity_1000(b: &mut Bencher) {
fn do_bench_from_fn(b: &mut Bencher, src_len: usize) {
b.bytes = src_len as u64;

b.iter(|| {
let dst = (0..src_len).collect::<Vec<_>>();
assert_eq!(dst.len(), src_len);
assert!(dst.iter().enumerate().all(|(i, x)| i == *x));
dst
})
b.iter(|| (0..src_len).collect::<Vec<_>>())
}

#[bench]
Expand All @@ -77,12 +62,7 @@ fn bench_from_fn_1000(b: &mut Bencher) {
fn do_bench_from_elem(b: &mut Bencher, src_len: usize) {
b.bytes = src_len as u64;

b.iter(|| {
let dst: Vec<usize> = repeat(5).take(src_len).collect();
assert_eq!(dst.len(), src_len);
assert!(dst.iter().all(|x| *x == 5));
dst
})
b.iter(|| repeat(5).take(src_len).collect::<Vec<usize>>())
}

#[bench]
Expand Down Expand Up @@ -110,12 +90,7 @@ fn do_bench_from_slice(b: &mut Bencher, src_len: usize) {

b.bytes = src_len as u64;

b.iter(|| {
let dst = src.clone()[..].to_vec();
assert_eq!(dst.len(), src_len);
assert!(dst.iter().enumerate().all(|(i, x)| i == *x));
dst
});
b.iter(|| src.as_slice().to_vec());
}

#[bench]
Expand Down Expand Up @@ -144,9 +119,7 @@ fn do_bench_from_iter(b: &mut Bencher, src_len: usize) {
b.bytes = src_len as u64;

b.iter(|| {
let dst: Vec<_> = FromIterator::from_iter(src.clone());
assert_eq!(dst.len(), src_len);
assert!(dst.iter().enumerate().all(|(i, x)| i == *x));
let dst: Vec<_> = FromIterator::from_iter(src.iter().cloned());
dst
});
}
Expand Down Expand Up @@ -180,8 +153,6 @@ fn do_bench_extend(b: &mut Bencher, dst_len: usize, src_len: usize) {
b.iter(|| {
let mut dst = dst.clone();
dst.extend(src.clone());
assert_eq!(dst.len(), dst_len + src_len);
assert!(dst.iter().enumerate().all(|(i, x)| i == *x));
dst
});
}
Expand Down Expand Up @@ -230,8 +201,6 @@ fn do_bench_extend_from_slice(b: &mut Bencher, dst_len: usize, src_len: usize) {
b.iter(|| {
let mut dst = dst.clone();
dst.extend_from_slice(&src);
assert_eq!(dst.len(), dst_len + src_len);
assert!(dst.iter().enumerate().all(|(i, x)| i == *x));
dst
});
}
Expand Down Expand Up @@ -290,12 +259,7 @@ fn do_bench_clone(b: &mut Bencher, src_len: usize) {

b.bytes = src_len as u64;

b.iter(|| {
let dst = src.clone();
assert_eq!(dst.len(), src_len);
assert!(dst.iter().enumerate().all(|(i, x)| i == *x));
dst
});
b.iter(|| src.clone());
}

#[bench]
Expand Down Expand Up @@ -329,8 +293,7 @@ fn do_bench_clone_from(b: &mut Bencher, times: usize, dst_len: usize, src_len: u

for _ in 0..times {
dst.clone_from(&src);
assert_eq!(dst.len(), src_len);
assert!(dst.iter().enumerate().all(|(i, x)| dst_len + i == *x));
dst = black_box(dst);
}
dst
});
Expand Down Expand Up @@ -463,11 +426,10 @@ macro_rules! bench_in_place {
fn $fname(b: &mut Bencher) {
b.iter(|| {
let src: Vec<$type> = black_box(vec![$init; $count]);
let mut sink = src.into_iter()
src.into_iter()
.enumerate()
.map(|(idx, e)| idx as $type ^ e)
.collect::<Vec<$type>>();
black_box(sink.as_mut_ptr())
.collect::<Vec<$type>>()
});
}
)+
Expand Down Expand Up @@ -527,7 +489,6 @@ fn bench_in_place_zip_recycle(b: &mut Bencher) {
.enumerate()
.map(|(i, (d, s))| d.wrapping_add(i as u8) ^ s)
.collect::<Vec<_>>();
assert_eq!(mangled.len(), 1000);
data = black_box(mangled);
});
}
Expand Down Expand Up @@ -614,23 +575,6 @@ fn bench_nest_chain_chain_collect(b: &mut Bencher) {
});
}

pub fn example_plain_slow(l: &[u32]) -> Vec<u32> {
let mut result = Vec::with_capacity(l.len());
result.extend(l.iter().rev());
result
}

pub fn map_fast(l: &[(u32, u32)]) -> Vec<u32> {
let mut result = Vec::with_capacity(l.len());
for i in 0..l.len() {
unsafe {
*result.get_unchecked_mut(i) = l[i].0;
result.set_len(i);
}
}
result
}

#[bench]
fn bench_range_map_collect(b: &mut Bencher) {
b.iter(|| (0..LEN).map(|_| u32::default()).collect::<Vec<_>>());
Expand Down Expand Up @@ -669,7 +613,11 @@ fn bench_rev_1(b: &mut Bencher) {
#[bench]
fn bench_rev_2(b: &mut Bencher) {
let data = black_box([0; LEN]);
b.iter(|| example_plain_slow(&data));
b.iter(|| {
let mut v = Vec::<u32>::with_capacity(data.len());
v.extend(data.iter().rev());
v
});
}

#[bench]
Expand All @@ -685,7 +633,16 @@ fn bench_map_regular(b: &mut Bencher) {
#[bench]
fn bench_map_fast(b: &mut Bencher) {
let data = black_box([(0, 0); LEN]);
b.iter(|| map_fast(&data));
b.iter(|| {
let mut result = Vec::with_capacity(data.len());
for i in 0..data.len() {
unsafe {
*result.get_unchecked_mut(i) = data[i].0;
result.set_len(i);
}
}
result
});
}

fn random_sorted_fill(mut seed: u32, buf: &mut [u32]) {
Expand Down

0 comments on commit 689e847

Please sign in to comment.