Skip to content

Commit

Permalink
simd shuffle
Browse files Browse the repository at this point in the history
  • Loading branch information
TheIronBorn committed Jun 20, 2018
1 parent a418ea3 commit 849e9b2
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 12 deletions.
6 changes: 3 additions & 3 deletions rand_core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ pub mod simd_impls;
/// to avoid platform differences, and avoid making any changes which affect
/// output (except by communicating that the release has breaking changes).
///
/// Typically implementators will implement only one of the methods available
/// Typically implementors will implement only one of the methods available
/// in this trait directly, then use the helper functions from the
/// [`rand_core::impls`] module to implement the other methods.
///
Expand Down Expand Up @@ -209,7 +209,7 @@ pub trait RngCore {
/// Some generators may satisfy an additional property, however this is not
/// required by this trait: if the CSPRNG's state is revealed, it should not be
/// computationally-feasible to reconstruct output prior to this. Some other
/// generators allow backwards-computation and are consided *reversible*.
/// generators allow backwards-computation and are considered *reversible*.
///
/// Note that this trait is provided for guidance only and cannot guarantee
/// suitability for cryptographic applications. In general it should only be
Expand Down Expand Up @@ -293,7 +293,7 @@ pub trait SeedableRng: Sized {
///
/// It is however not required that this function yield the same state as a
/// reference implementation of the PRNG given equivalent seed; if necessary
/// another constructor replicating behaviour from a reference
/// another constructor replicating behavior from a reference
/// implementation can be added.
///
/// PRNG implementations should make sure `from_seed` never panics. In the
Expand Down
19 changes: 17 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,11 @@ extern crate rand_core;
#[cfg(feature="simd_support")]
extern crate stdsimd;

#[cfg(feature="simd_support")]
mod simd_shuffle;
#[cfg(feature="simd_support")]
pub use simd_shuffle::SimdShuf;

// Re-exports from rand_core
pub use rand_core::{RngCore, CryptoRng, SeedableRng};
pub use rand_core::{ErrorKind, Error};
Expand Down Expand Up @@ -318,7 +323,7 @@ pub mod isaac {

#[cfg(feature="simd_support")]
use stdsimd::simd::*;
use core::{marker, mem, slice};
use core::{marker, mem, slice, ptr};
use distributions::{Distribution, Standard};
use distributions::uniform::{SampleUniform, UniformSampler};

Expand Down Expand Up @@ -622,7 +627,8 @@ pub trait Rng: RngCore {
// invariant: elements with index >= i have been locked in place.
i -= 1;
// lock element i in place.
values.swap(i, self.gen_range(0, i + 1));
let r = self.gen_range(0, i + 1);
unsafe { swap_unchecked(values, i, r); }
}
}

Expand Down Expand Up @@ -688,6 +694,15 @@ pub trait Rng: RngCore {
}
}

/// Use to remove bound checks when the compiler isn't smart enough about
/// `gen_range` and related.
#[inline]
unsafe fn swap_unchecked<T>(values: &mut [T], a: usize, b: usize) {
let pa: *mut T = values.get_unchecked_mut(a);
let pb: *mut T = values.get_unchecked_mut(b);
ptr::swap(pa, pb);
}

impl<R: RngCore + ?Sized> Rng for R {}

/// Trait for casting types to byte slices
Expand Down
9 changes: 6 additions & 3 deletions src/prng/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
//!
//! In simple terms, the basic PRNGs are often predictable; CSPRNGs should not
//! be predictable *when used correctly*.
//!
//!
//! Contents of this documentation:
//!
//! 1. [The generators](#the-generators)
Expand Down Expand Up @@ -136,7 +136,7 @@
//! 256 bits would be approximately the minimum secure size. In practice,
//! CSPRNGs tend to use quite a bit more, [`ChaChaRng`] is relatively small with
//! 136 bytes of state.
//!
//!
//! ## Initialization time
//!
//! The time required to initialize new generators varies significantly. Many
Expand Down Expand Up @@ -320,6 +320,8 @@ pub mod hc128;
pub mod isaac;
pub mod isaac64;
mod sfc32;
#[cfg(feature = "simd_support")]
mod sfc_alt;
mod xorshift;

mod isaac_array;
Expand All @@ -328,6 +330,7 @@ pub use self::chacha::ChaChaRng;
pub use self::hc128::Hc128Rng;
pub use self::isaac::IsaacRng;
pub use self::isaac64::Isaac64Rng;
pub use self::sfc32::Sfc32Rng;
pub use self::sfc32::*;
#[cfg(feature = "simd_support")]
pub use self::sfc_alt::*;
pub use self::xorshift::XorShiftRng;
4 changes: 2 additions & 2 deletions src/rngs/jitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -598,11 +598,11 @@ impl JitterRng {
self.stir_pool();
self.data
}

/// Basic quality tests on the timer, by measuring CPU timing jitter a few
/// hundred times.
///
/// If succesful, this will return the estimated number of rounds necessary
/// If successful, this will return the estimated number of rounds necessary
/// to collect 64 bits of entropy. Otherwise a [`TimerError`] with the cause
/// of the failure will be returned.
///
Expand Down
4 changes: 2 additions & 2 deletions src/rngs/os.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ use rand_core::{CryptoRng, RngCore, Error, impls};
/// # Panics
///
/// `OsRng` is extremely unlikely to fail if `OsRng::new()`, and one read from
/// it, where succesfull. But in case it does fail, only [`try_fill_bytes`] is
/// it, where successful. But in case it does fail, only [`try_fill_bytes`] is
/// able to report the cause. Depending on the error the other [`RngCore`]
/// methods will retry several times, and panic in case the error remains.
///
Expand Down Expand Up @@ -963,7 +963,7 @@ mod imp {
#[cfg(windows)]
mod imp {
extern crate winapi;

use {Error, ErrorKind};
use super::OsRngImpl;

Expand Down
120 changes: 120 additions & 0 deletions src/simd_shuffle.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
//! An SIMD shuffle implementation.
//!
//! Loosely based on Daniel Lemire's [SIMDxorshift].
//!
//! [SIMDxorshift]: (https://github.com/lemire/SIMDxorshift
// use core::mem::size_of;
use stdsimd::simd::*;

// use distributions::range::SampleSingleHigh;
use distributions::uniform::WideningMultiply;
use {swap_unchecked, Rng};

/// A trait for shuffling slices.
pub trait SimdShuf {
/// Shuffle a mutable slice in place, using an SIMD implementation.
///
/// To be used in the form:
/// ```rust
/// u16x8::simd_shuffle(&mut rng, &mut list);
/// ```
///
/// Use a vector of size greater than or equal to the PRNG output.
/// Smaller lane widths will likely be faster for equal vector sizes.
///
/// # Panics
///
/// If `values.len()` is larger than the maximum value of the vector's
/// lanes. (If `values.len()` is unknown, use a `u32xN` or `u64xN`
/// vector depending on [`target_pointer_width`].)
///
/// [`target_pointer_width`]: https://doc.rust-lang.org/reference/attributes.html#conditional-compilation
fn simd_shuffle<R: Rng, T>(rng: &mut R, values: &mut [T]);
}

macro_rules! impl_simd_shuf {
($vec:ident, $scalar:ident) => {
impl SimdShuf for $vec {
// TODO: make this adapt when too many/few elements
// could match on values.len and use an appropriate
// lane width, based on chosen vector width
#[inline(always)]
fn simd_shuffle<R: Rng, T>(rng: &mut R, values: &mut [T]) {
assert!(
values.len() <= $scalar::max_value() as usize,
"Slice length too long for the vector's lanes",
);

// Create a vector to hold `$vec::lanes()` range bounds at
// once. This should be evaluated at compile-time.
// TODO: consider making this a macro
let mut interval = $vec::default();
for vec_idx in 0..$vec::lanes() {
// (len, len - 1, len - 2, len - 3, ..., len - $vec::lanes() + 1)
interval = interval.replace(vec_idx, (values.len() - vec_idx) as $scalar);
}
let mut slice_idx = values.len();

// shuffle a multiple of `$vec::lanes()` slice elements
for _ in 0..values.len() / $vec::lanes() {
let rand_indices = rng.gen_range($vec::splat(0), interval);

// swap each `rand_idx` with the next `slice_idx`
// TODO: could probably be optimized
for vec_idx in 0..$vec::lanes() {
slice_idx -= 1;
let rand_idx = rand_indices.extract(vec_idx) as usize;
unsafe { swap_unchecked(values, slice_idx, rand_idx); }
}

// move onto the next interval
interval -= $vec::lanes() as $scalar;
}

// shuffle the remaining elements
// This is likely overzealous
let remainder = values.len() % $vec::lanes();
if remainder > 1 {
match remainder - 1 {
1...2 => rem_shuf!(u16x2, remainder, rng, values, slice_idx, u16),
3...4 => rem_shuf!(u16x4, remainder, rng, values, slice_idx, u16),
5...8 => rem_shuf!(u16x8, remainder, rng, values, slice_idx, u16),
9...16 => rem_shuf!(u16x16, remainder, rng, values, slice_idx, u16),
17...32 => rem_shuf!(u16x32, remainder, rng, values, slice_idx, u16),
33...64 | _ => rem_shuf!(u8x64, remainder, rng, values, slice_idx, u8),
}
}
}
}
};

// bulk implementation for scalar types
($($vec:ident,)+, $scalar:ident) => {$(
impl_simd_shuf!($vec, $scalar);
)+};
}

macro_rules! rem_shuf {
($vec:ident, $rem:ident, $rng:ident, $values:ident, $slice_idx:ident, $scalar:ty) => {{
// We can exit interval generation early, because we only need a few
// indices. We can't avoid generating unneeded random indices however,
// so we use a default value of 1 to speed up the uniform sampling.
let mut interval = $vec::splat(1);
for vec_idx in 0..$rem - 1 {
interval = interval.replace(vec_idx, ($rem - vec_idx) as $scalar);
}

let rand_indices = $rng.gen_range($vec::splat(0), interval);
for vec_idx in 0..$rem - 1 {
$slice_idx -= 1;
let rand_idx = rand_indices.extract(vec_idx) as usize;
unsafe { swap_unchecked($values, $slice_idx, rand_idx); }
}
}};
}

impl_simd_shuf!(u8x2, u8x4, u8x8, u8x16, u8x32, u8x64,, u8);
impl_simd_shuf!(u16x2, u16x4, u16x8, u16x16, u16x32,, u16);
impl_simd_shuf!(u32x2, u32x4, u32x8, u32x16,, u32);
impl_simd_shuf!(u64x2, u64x4, u64x8,, u64);

0 comments on commit 849e9b2

Please sign in to comment.