Skip to content

Commit

Permalink
Add 'unrolled' is_ascii_align_to benchmark, and move is_ascii benchma…
Browse files Browse the repository at this point in the history
…rks into own file
  • Loading branch information
Thom Chiovoloni committed Jul 5, 2020
1 parent 13e380d commit dc4a644
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 76 deletions.
78 changes: 2 additions & 76 deletions src/libcore/benches/ascii.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
mod is_ascii;

// Lower-case ASCII 'a' is the first byte that has its highest bit set
// after wrap-adding 0x1F:
//
Expand Down Expand Up @@ -59,48 +61,6 @@ macro_rules! benches {
)+
}
};

// For some tests the vec allocation tends to dominate, so it can be avoided.
(@readonly $( fn $name: ident($arg: ident: &[u8]) $body: block )+) => {
benches!(@ro mod short_readonly SHORT $($name $arg $body)+);
benches!(@ro mod medium_readonly MEDIUM $($name $arg $body)+);
benches!(@ro mod long_readonly LONG $($name $arg $body)+);
// Add another `MEDIUM` bench, but trim the ends so that we can (try to)
// benchmark a case where the function has to handle misalignment.
mod medium_unaligned {
use super::*;
$(
#[bench]
fn $name(bencher: &mut Bencher) {
bencher.bytes = MEDIUM.len() as u64 - 2;
let mut vec = MEDIUM.as_bytes().to_vec();
bencher.iter(|| {
black_box(&mut vec);
let $arg = black_box(&vec[1..(vec.len() - 1)]);
black_box($body)
})
}
)+
}
};
(@ro mod $mod_name: ident $input: ident $($name: ident $arg: ident $body: block)+) => {
mod $mod_name {
use super::*;

$(
#[bench]
fn $name(bencher: &mut Bencher) {
bencher.bytes = $input.len() as u64;
let mut vec = $input.as_bytes().to_vec();
bencher.iter(|| {
black_box(&mut vec);
let $arg = black_box(&vec[..]);
black_box($body)
})
}
)+
}
};
}

use test::black_box;
Expand Down Expand Up @@ -287,40 +247,6 @@ benches! {
is_ascii_control,
}

benches! {
@readonly
fn is_ascii_slice_libcore(bytes: &[u8]) {
bytes.is_ascii()
}

fn is_ascii_slice_iter_all(bytes: &[u8]) {
bytes.iter().all(|b| b.is_ascii())
}

fn is_ascii_slice_align_to(bytes: &[u8]) {
is_ascii_align_to_impl(bytes)
}
}

// Separate since it's easier to debug errors if they don't go through macro
// expansion first.
fn is_ascii_align_to_impl(bytes: &[u8]) -> bool {
if bytes.len() < core::mem::size_of::<usize>() {
return bytes.iter().all(|b| b.is_ascii());
}
// SAFETY: transmuting a sequence of `u8` to `usize` is always fine
let (head, body, tail) = unsafe { bytes.align_to::<usize>() };
head.iter().all(|b| b.is_ascii())
&& body.iter().all(|w| !contains_nonascii(*w))
&& tail.iter().all(|b| b.is_ascii())
}

#[inline]
fn contains_nonascii(v: usize) -> bool {
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
(NONASCII_MASK & v) != 0
}

macro_rules! repeat {
($s: expr) => {
concat!($s, $s, $s, $s, $s, $s, $s, $s, $s, $s)
Expand Down
81 changes: 81 additions & 0 deletions src/libcore/benches/ascii/is_ascii.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
use super::{LONG, MEDIUM, SHORT};
use test::black_box;
use test::Bencher;

macro_rules! benches {
($( fn $name: ident($arg: ident: &[u8]) $body: block )+) => {
benches!(mod short SHORT[..] $($name $arg $body)+);
benches!(mod medium MEDIUM[..] $($name $arg $body)+);
benches!(mod long LONG[..] $($name $arg $body)+);

benches!(mod unaligned_head MEDIUM[1..] $($name $arg $body)+);
benches!(mod unaligned_tail MEDIUM[..(MEDIUM.len() - 1)] $($name $arg $body)+);
benches!(mod unaligned_both MEDIUM[1..(MEDIUM.len() - 1)] $($name $arg $body)+);
};

(mod $mod_name: ident $input: ident [$range: expr] $($name: ident $arg: ident $body: block)+) => {
mod $mod_name {
use super::*;
$(
#[bench]
fn $name(bencher: &mut Bencher) {
bencher.bytes = $input[$range].len() as u64;
let mut vec = $input.as_bytes().to_vec();
bencher.iter(|| {
black_box(&mut vec);
let $arg = black_box(&vec[$range]);
black_box($body)
})
}
)+
}
};
}

benches! {
fn case00_libcore(bytes: &[u8]) {
bytes.is_ascii()
}

fn case01_iter_all(bytes: &[u8]) {
bytes.iter().all(|b| b.is_ascii())
}

fn case02_align_to(bytes: &[u8]) {
is_ascii_align_to(bytes)
}

fn case03_align_to_unrolled(bytes: &[u8]) {
is_ascii_align_to_unrolled(bytes)
}
}

// These are separate since it's easier to debug errors if they don't go through
// macro expansion first.
fn is_ascii_align_to(bytes: &[u8]) -> bool {
if bytes.len() < core::mem::size_of::<usize>() {
return bytes.iter().all(|b| b.is_ascii());
}
// SAFETY: transmuting a sequence of `u8` to `usize` is always fine
let (head, body, tail) = unsafe { bytes.align_to::<usize>() };
head.iter().all(|b| b.is_ascii())
&& body.iter().all(|w| !contains_nonascii(*w))
&& tail.iter().all(|b| b.is_ascii())
}

fn is_ascii_align_to_unrolled(bytes: &[u8]) -> bool {
if bytes.len() < core::mem::size_of::<usize>() {
return bytes.iter().all(|b| b.is_ascii());
}
// SAFETY: transmuting a sequence of `u8` to `[usize; 2]` is always fine
let (head, body, tail) = unsafe { bytes.align_to::<[usize; 2]>() };
head.iter().all(|b| b.is_ascii())
&& body.iter().all(|w| !contains_nonascii(w[0] | w[1]))
&& tail.iter().all(|b| b.is_ascii())
}

#[inline]
fn contains_nonascii(v: usize) -> bool {
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
(NONASCII_MASK & v) != 0
}

0 comments on commit dc4a644

Please sign in to comment.