From 7792481d0b99518e11b8d527290fe6ab12429914 Mon Sep 17 00:00:00 2001 From: Vurich Date: Mon, 26 Feb 2018 12:54:15 +0100 Subject: [PATCH 1/2] Optimisations and convert to use crunchy --- Cargo.toml | 3 ++ src/lib.rs | 140 ++++++++++++++++++++++++----------------------------- 2 files changed, 67 insertions(+), 76 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ef151c6..35374d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,9 @@ authors = ["debris "] keywords = ["sha3", "sha-3", "keccak", "crypto", "tiny"] categories = ["cryptography", "no-std"] +[dependencies] +crunchy = "0.1.6" + [profile.dev] opt-level = 3 # Controls the --opt-level the compiler builds with debug = false # Controls whether the compiler passes `-g` diff --git a/src/lib.rs b/src/lib.rs index dc1e18d..b2af03a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,6 +37,9 @@ #![no_std] +#[macro_use] +extern crate crunchy; + const RHO: [u32; 24] = [ 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, @@ -60,84 +63,69 @@ const RC: [u64; 24] = [ 0x8000000080008081u64, 0x8000000000008080u64, 0x80000001u64, 0x8000000080008008u64 ]; -macro_rules! REPEAT4 { - ($e: expr) => ( $e; $e; $e; $e; ) -} - -macro_rules! REPEAT5 { - ($e: expr) => ( $e; $e; $e; $e; $e; ) -} - -macro_rules! REPEAT6 { - ($e: expr) => ( $e; $e; $e; $e; $e; $e; ) -} - -macro_rules! REPEAT24 { - ($e: expr, $s: expr) => ( - REPEAT6!({ $e; $s; }); - REPEAT6!({ $e; $s; }); - REPEAT6!({ $e; $s; }); - REPEAT5!({ $e; $s; }); - $e; - ) -} - -macro_rules! FOR5 { - ($v: expr, $s: expr, $e: expr) => { - $v = 0; - REPEAT4!({ - $e; - $v += $s; - }); - $e; - } -} - +#[allow(unused_assignments)] /// keccak-f[1600] pub fn keccakf(a: &mut [u64; PLEN]) { - let mut b: [u64; 5] = [0; 5]; - let mut t: u64; - let mut x: usize; - let mut y: usize; - - for i in 0..24 { - // Theta - FOR5!(x, 1, { - b[x] = 0; - FOR5!(y, 5, { - b[x] ^= a[x + y]; - }); - }); - - FOR5!(x, 1, { - FOR5!(y, 5, { - a[y + x] ^= b[(x + 4) % 5] ^ b[(x + 1) % 5].rotate_left(1); - }); - }); - - // Rho and pi - t = a[1]; - x = 0; - REPEAT24!({ - b[0] = a[PI[x]]; - a[PI[x]] = t.rotate_left(RHO[x]); - }, { - t = b[0]; - x += 1; - }); - - // Chi - FOR5!(y, 5, { - FOR5!(x, 1, { - b[x] = a[y + x]; - }); - FOR5!(x, 1, { - a[y + x] = b[x] ^ ((!b[(x + 1) % 5]) & (b[(x + 2) % 5])); - }); - }); - - // Iota - a[0] ^= RC[i]; + let mut arrays: [[u64; 5]; 24] = [[0; 5]; 24]; + + unroll! { + for i in 0..24 { + // Theta + unroll! { + for x in 0..5 { + arrays[i][x] = 0; + unroll! { + for y_count in 0..5 { + let y = y_count * 5; + arrays[i][x] ^= a[x + y]; + } + } + } + } + + unroll! { + for x in 0..5 { + unroll! { + for y_count in 0..5 { + let y = y_count * 5; + a[y + x] ^= arrays[i][(x + 4) % 5] ^ arrays[i][(x + 1) % 5].rotate_left(1); + } + } + } + } + + // Rho and pi + let mut last = a[1]; + unroll! { + for x in 0..24 { + arrays[i][0] = a[PI[x]]; + a[PI[x]] = last.rotate_left(RHO[x]); + last = arrays[i][0]; + } + } + + // Chi + unroll! { + for y_step in 0..5 { + let y = y_step * 5; + + unroll! { + for x in 0..5 { + arrays[i][x] = a[y + x]; + } + } + + unroll! { + for x in 0..5 { + a[y + x] = arrays[i][x] ^ ((!arrays[i][(x + 1) % 5]) & (arrays[i][(x + 2) % 5])); + } + } + } + }; + + // Iota + a[0] ^= RC[i]; + } } } From 662d2e27478bd05b12501b4bd0a30976add924a6 Mon Sep 17 00:00:00 2001 From: Vurich Date: Mon, 26 Feb 2018 14:00:30 +0100 Subject: [PATCH 2/2] Add comment explaining why I kept in the seemingly-useless zeroing --- src/lib.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index b2af03a..877a527 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -73,7 +73,14 @@ pub fn keccakf(a: &mut [u64; PLEN]) { // Theta unroll! { for x in 0..5 { + // This looks useless but it gets way slower without it. I tried using + // `mem::uninitialized` for the initialisation of `arrays` but that also makes + // it slower, although not by as much as removing this assignment. Optimisers + // are weird. Maybe a different version of LLVM will react differently, so if + // you see this comment in the future try deleting this assignment and using + // uninit above and see how it affects the benchmarks. arrays[i][x] = 0; + unroll! { for y_count in 0..5 { let y = y_count * 5;