Skip to content

Commit

Permalink
Wasm simd128
Browse files Browse the repository at this point in the history
  • Loading branch information
DoumanAsh committed Feb 17, 2024
1 parent 4fa0e31 commit 2ac14db
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 10 deletions.
37 changes: 35 additions & 2 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
rustup update
else
curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal --default-toolchain stable
echo ::add-path::$HOME/.cargo/bin
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
fi
#- name: Install Miri
Expand Down Expand Up @@ -76,6 +76,39 @@ jobs:
# cargo +nightly miri test --features xxh32,const_xxh32,xxh64,const_xxh64,xxh3,const_xxh3
# cargo +nightly miri test --release --features xxh32,const_xxh32,xxh64,const_xxh64,xxh3,const_xxh3

wasm-platform-test:
needs: full-test
runs-on: ubuntu-latest
container: ghcr.io/webassembly/wasi-sdk
if: github.event.pull_request.draft == false
steps:
- uses: actions/checkout@v4

- name: Install system packages
run: apt-get update && apt-get install -y curl xz-utils

- name: Install Rust Unix
run: |
if rustup --version >/dev/null 2>&1; then
rustup update
else
curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal --default-toolchain stable
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
fi
- name: Install Wasi runtime
run: |
curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v17.0.1/wasmtime-v17.0.1-x86_64-linux.tar.xz > wasmtime.tar.xz
mkdir $HOME/wasmtime
tar -xf wasmtime.tar.xz -C $HOME/wasmtime --strip-components 1
echo "$HOME/wasmtime/" >> $GITHUB_PATH
cargo install cargo-wasi
- name: Test simd128
env:
RUSTFLAGS: "-Ctarget-feature=+simd128"
run: cargo wasi test --test assert_correctness --features xxh32,const_xxh32,xxh64,const_xxh64,xxh3,const_xxh3 -- --nocapture

cross-platform-test:
needs: full-test
runs-on: ubuntu-latest
Expand All @@ -100,7 +133,7 @@ jobs:
rustup update
else
curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal --default-toolchain stable
echo ::add-path::$HOME/.cargo/bin
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
fi
- name: Install Cross
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ xxh3 = [] # Enable xxh3 implementation
const_xxh3 = [] # Enable const xxh3 implementation

[dev-dependencies]
xxhash-c-sys = "0.8.4"
getrandom = "0.2"
xxhash-c-sys = "0.8.6"

[package.metadata.docs.rs]
features = ["xxh32", "const_xxh32", "xxh64", "const_xxh64", "xxh3", "const_xxh3"]
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ Used SIMD acceleration:
- SSE2 - widely available, can be safely enabled in 99% of cases. Enabled by default in `x86_64` targets.
- AVX2;
- Neon - Enabled by default on aarch64 targets (most likely)
- Wasm SIMD128 - Has to be enabled via rust flag: `-Ctarget-feature=+simd128`

## Streaming vs One-shot

Expand Down
3 changes: 2 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@
//!
//!- SSE2 - widely available, can be safely enabled in 99% of cases. Enabled by default in `x86_64` targets.
//!- AVX2;
//!- Neon - Enabled by default on aarch64 targets (most likely)
//!- Neon - Enabled by default on aarch64 targets (most likely);
//!- Wasm SIMD128 - Has to be enabled via rust flag: `-Ctarget-feature=+simd128`
//!
//!## Streaming vs One-shot
//!
Expand Down
80 changes: 74 additions & 6 deletions src/xxh3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ use crate::xxh3_common::*;
// Code is as close to original C implementation as possible
// It does make it look ugly, but it is fast and easy to update once xxhash gets new version.

#[cfg(all(any(target_feature = "sse2", target_feature = "neon"), not(target_feature = "avx2")))]
#[cfg(all(any(target_feature = "sse2", target_feature = "neon", all(target_family = "wasm", target_feature = "simd128")), not(target_feature = "avx2")))]
#[repr(align(16))]
#[derive(Clone)]
struct Acc([u64; ACC_NB]);
#[cfg(target_feature = "avx2")]
#[repr(align(32))]
#[derive(Clone)]
struct Acc([u64; ACC_NB]);
#[cfg(not(any(target_feature = "avx2", target_feature = "neon", target_feature = "sse2")))]
#[cfg(not(any(target_feature = "avx2", target_feature = "neon", all(target_family = "wasm", target_feature = "simd128"), target_feature = "sse2")))]
#[repr(align(8))]
#[derive(Clone)]
struct Acc([u64; ACC_NB]);
Expand Down Expand Up @@ -141,6 +141,46 @@ fn custom_default_secret(seed: u64) -> [u8; DEFAULT_SECRET_SIZE] {
}
}

#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
fn accumulate_512_wasm(acc: &mut Acc, input: *const u8, secret: *const u8) {
const LANES: usize = ACC_NB;

use core::arch::wasm32::*;

let mut idx = 0usize;
let xacc = acc.0.as_mut_ptr() as *mut v128;

unsafe {
while idx.wrapping_add(1) < LANES / 2 {
let data_vec_1 = v128_load(input.add(idx.wrapping_mul(16)) as _);
let data_vec_2 = v128_load(input.add(idx.wrapping_add(1).wrapping_mul(16)) as _);

let key_vec_1 = v128_load(secret.add(idx.wrapping_mul(16)) as _);
let key_vec_2 = v128_load(secret.add(idx.wrapping_add(1).wrapping_mul(16)) as _);

let data_key_1 = v128_xor(data_vec_1, key_vec_1);
let data_key_2 = v128_xor(data_vec_2, key_vec_2);

let data_swap_1 = i64x2_shuffle::<1, 0>(data_vec_1, data_vec_1);
let data_swap_2 = i64x2_shuffle::<1, 0>(data_vec_2, data_vec_2);

let mixed_lo = i32x4_shuffle::<0, 2, 4, 6>(data_key_1, data_key_2);
let mixed_hi = i32x4_shuffle::<1, 3, 5, 7>(data_key_1, data_key_2);

let prod_1 = u64x2_extmul_low_u32x4(mixed_lo, mixed_hi);
let prod_2 = u64x2_extmul_high_u32x4(mixed_lo, mixed_hi);

let sum_1 = i64x2_add(prod_1, data_swap_1);
let sum_2 = i64x2_add(prod_2, data_swap_2);

xacc.add(idx).write(i64x2_add(sum_1, *xacc.add(idx)));
xacc.add(idx.wrapping_add(1)).write(i64x2_add(sum_2, *xacc.add(idx.wrapping_add(1))));

idx = idx.wrapping_add(2);
}
}
}

#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
macro_rules! vld1q_u8 {
($ptr:expr) => {
Expand Down Expand Up @@ -288,7 +328,7 @@ fn accumulate_512_avx2(acc: &mut Acc, input: *const u8, secret: *const u8) {
}
}

#[cfg(not(any(target_feature = "avx2", target_feature = "sse2", target_feature = "neon")))]
#[cfg(not(any(target_feature = "avx2", target_feature = "sse2", target_feature = "neon", all(target_family = "wasm", target_feature = "simd128"))))]
#[inline(always)]
fn accumulate_512_scalar(acc: &mut Acc, input: *const u8, secret: *const u8) {
for idx in 0..ACC_NB {
Expand All @@ -301,6 +341,9 @@ fn accumulate_512_scalar(acc: &mut Acc, input: *const u8, secret: *const u8) {
}

fn accumulate_512(acc: &mut Acc, input: *const u8, secret: *const u8) {
#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
accumulate_512_wasm(acc, input, secret);

#[cfg(target_feature = "neon")]
accumulate_512_neon(acc, input, secret);

Expand All @@ -310,10 +353,32 @@ fn accumulate_512(acc: &mut Acc, input: *const u8, secret: *const u8) {
#[cfg(target_feature = "avx2")]
accumulate_512_avx2(acc, input, secret);

#[cfg(not(any(target_feature = "avx2", target_feature = "sse2", target_feature = "neon")))]
#[cfg(not(any(target_feature = "avx2", target_feature = "sse2", target_feature = "neon", all(target_family = "wasm", target_feature = "simd128"))))]
accumulate_512_scalar(acc, input, secret);
}

#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
#[inline(always)]
fn scramble_acc_wasm(acc: &mut Acc, secret: *const u8) {
const LANES: usize = ACC_NB;

use core::arch::wasm32::*;

let xacc = acc.0.as_mut_ptr() as *mut v128;
let prime = u64x2_splat(xxh32::PRIME_1 as _);

unsafe {
for idx in 0..LANES / 2 {
let acc_vec = v128_load(xacc.add(idx) as _);
let shifted = u64x2_shr(acc_vec, 47);
let data_vec = v128_xor(acc_vec, shifted);
let key_vec = v128_load(secret.add(16usize.wrapping_mul(idx)) as _);
let mixed = v128_xor(data_vec, key_vec);
xacc.add(idx).write(i64x2_mul(mixed, prime));
}
}
}

#[cfg(target_feature = "neon")]
#[inline(always)]
fn scramble_acc_neon(acc: &mut Acc, secret: *const u8) {
Expand Down Expand Up @@ -408,7 +473,7 @@ fn scramble_acc_avx2(acc: &mut Acc, secret: *const u8) {
}
}

#[cfg(not(any(target_feature = "avx2", target_feature = "sse2", target_feature = "neon")))]
#[cfg(not(any(target_feature = "avx2", target_feature = "sse2", target_feature = "neon", all(target_family = "wasm", target_feature = "simd128"))))]
#[inline(always)]
fn scramble_acc_scalar(acc: &mut Acc, secret: *const u8) {
for idx in 0..ACC_NB {
Expand All @@ -420,6 +485,9 @@ fn scramble_acc_scalar(acc: &mut Acc, secret: *const u8) {
}

fn scramble_acc(acc: &mut Acc, secret: *const u8) {
#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
scramble_acc_wasm(acc, secret);

#[cfg(target_feature = "neon")]
scramble_acc_neon(acc, secret);

Expand All @@ -429,7 +497,7 @@ fn scramble_acc(acc: &mut Acc, secret: *const u8) {
#[cfg(target_feature = "avx2")]
scramble_acc_avx2(acc, secret);

#[cfg(not(any(target_feature = "avx2", target_feature = "sse2", target_feature = "neon")))]
#[cfg(not(any(target_feature = "avx2", target_feature = "sse2", target_feature = "neon", all(target_family = "wasm", target_feature = "simd128"))))]
scramble_acc_scalar(acc, secret)
}

Expand Down

0 comments on commit 2ac14db

Please sign in to comment.