Skip to content

Commit

Permalink
Include even faster code from another reddit user
Browse files Browse the repository at this point in the history
  • Loading branch information
tommyip committed Jul 14, 2023
1 parent f5a70b8 commit 10f48e3
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 2 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ Function | Time | Throughput | Relative speed
`opt5_simd_unrolled_10x` | 25.896 µs | 35.964 GiB/s | 128.6 🎉
`opt5_simd_unrolled_12x` | 27.697 µs | 33.626 GiB/s | 120.3
`opt5_simd_unrolled_16x` | 26.954 µs | 34.553 GiB/s | 123.6
`opt6_chunk_count`[^1] | 12.517 µs | 74.403 GiB/s | 266.2 🚀
`opt6_chunk_count`[^1] | 12.517 µs | 74.403 GiB/s | 266.2
`opt6_chunk_exact_count` [^2] | 11.451 µs | 81.332 GiB/s | 290.9 🚀

[^1]: Code suggested by Reddit user [u/DavidM603](https://www.reddit.com/r/rust/comments/14yvlc9/comment/jrwkag7).
[^1]: Credit to Reddit user [u/DavidM603](https://www.reddit.com/r/rust/comments/14yvlc9/comment/jrwkag7).
[^2]: Credit to Reddit user [u/Sharlinator](https://www.reddit.com/r/rust/comments/14yvlc9/comment/jrwt29t).
1 change: 1 addition & 0 deletions benches/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ fn benchmark(c: &mut Criterion) {
bench!(opt5_simd_unrolled_12x);
bench!(opt5_simd_unrolled_16x);
bench!(opt6_chunk_count);
bench!(opt6_chunk_exact_count);
}

criterion_group!(benches, benchmark);
Expand Down
14 changes: 14 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,19 @@ pub fn opt6_chunk_count(input: &str) -> i64 {
(2 * n_s) - input.len() as i64
}

/// Credit to u/Sharlinator
/// https://www.reddit.com/r/rust/comments/14yvlc9/comment/jrwt29t
pub fn opt6_chunk_exact_count(input: &str) -> i64 {
let iter = input.as_bytes().chunks_exact(256);
let rest = iter.remainder();
let mut n_s = iter
.map(|chunk| chunk.iter().map(|&b| b & 1).sum::<u8>())
.map(|chunk_total| chunk_total as i64)
.sum::<i64>();
n_s += rest.iter().map(|&b| b & 1).sum::<u8>() as i64;
(2 * n_s) - input.len() as i64
}

pub fn gen_random_input(size: usize) -> String {
let mut input = String::with_capacity(size);
let dist = Bernoulli::new(0.5).unwrap();
Expand Down Expand Up @@ -157,6 +170,7 @@ mod tests {
assert_eq!($expected, opt5_simd_unrolled_12x($input));
assert_eq!($expected, opt5_simd_unrolled_16x($input));
assert_eq!($expected, opt6_chunk_count($input));
assert_eq!($expected, opt6_chunk_exact_count($input));
};
}

Expand Down

0 comments on commit 10f48e3

Please sign in to comment.