From 7d9833f5beac482a8320ec1769028dd984d4c25a Mon Sep 17 00:00:00 2001 From: Jef Date: Thu, 17 May 2018 14:07:24 +0200 Subject: [PATCH 1/2] Reroll outer loop --- src/lib.rs | 96 ++++++++++++++++++++++++++---------------------------- 1 file changed, 47 insertions(+), 49 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 877a527..a90e0e8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -68,71 +68,69 @@ const RC: [u64; 24] = [ pub fn keccakf(a: &mut [u64; PLEN]) { let mut arrays: [[u64; 5]; 24] = [[0; 5]; 24]; - unroll! { - for i in 0..24 { - // Theta - unroll! { - for x in 0..5 { - // This looks useless but it gets way slower without it. I tried using - // `mem::uninitialized` for the initialisation of `arrays` but that also makes - // it slower, although not by as much as removing this assignment. Optimisers - // are weird. Maybe a different version of LLVM will react differently, so if - // you see this comment in the future try deleting this assignment and using - // uninit above and see how it affects the benchmarks. - arrays[i][x] = 0; - - unroll! { - for y_count in 0..5 { - let y = y_count * 5; - arrays[i][x] ^= a[x + y]; - } + for i in 0..24 { + // Theta + unroll! { + for x in 0..5 { + // This looks useless but it gets way slower without it. I tried using + // `mem::uninitialized` for the initialisation of `arrays` but that also makes + // it slower, although not by as much as removing this assignment. Optimisers + // are weird. Maybe a different version of LLVM will react differently, so if + // you see this comment in the future try deleting this assignment and using + // uninit above and see how it affects the benchmarks. + arrays[i][x] = 0; + + unroll! { + for y_count in 0..5 { + let y = y_count * 5; + arrays[i][x] ^= a[x + y]; } } } + } - unroll! { - for x in 0..5 { - unroll! { - for y_count in 0..5 { - let y = y_count * 5; - a[y + x] ^= arrays[i][(x + 4) % 5] ^ arrays[i][(x + 1) % 5].rotate_left(1); - } + unroll! { + for x in 0..5 { + unroll! { + for y_count in 0..5 { + let y = y_count * 5; + a[y + x] ^= arrays[i][(x + 4) % 5] ^ arrays[i][(x + 1) % 5].rotate_left(1); } } } + } - // Rho and pi - let mut last = a[1]; - unroll! { - for x in 0..24 { - arrays[i][0] = a[PI[x]]; - a[PI[x]] = last.rotate_left(RHO[x]); - last = arrays[i][0]; - } + // Rho and pi + let mut last = a[1]; + unroll! { + for x in 0..24 { + arrays[i][0] = a[PI[x]]; + a[PI[x]] = last.rotate_left(RHO[x]); + last = arrays[i][0]; } + } - // Chi - unroll! { - for y_step in 0..5 { - let y = y_step * 5; + // Chi + unroll! { + for y_step in 0..5 { + let y = y_step * 5; - unroll! { - for x in 0..5 { - arrays[i][x] = a[y + x]; - } + unroll! { + for x in 0..5 { + arrays[i][x] = a[y + x]; } + } - unroll! { - for x in 0..5 { - a[y + x] = arrays[i][x] ^ ((!arrays[i][(x + 1) % 5]) & (arrays[i][(x + 2) % 5])); - } + unroll! { + for x in 0..5 { + a[y + x] = arrays[i][x] ^ ((!arrays[i][(x + 1) % 5]) & (arrays[i][(x + 2) % 5])); } } - }; + } + }; - // Iota - a[0] ^= RC[i]; - } + // Iota + a[0] ^= RC[i]; } } From 30622a506659f4d2de4ab3d4f5be73a56d320e15 Mon Sep 17 00:00:00 2001 From: Jef Date: Thu, 17 May 2018 16:57:10 +0200 Subject: [PATCH 2/2] Remove hack no longer needed are rerolling loop --- src/lib.rs | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a90e0e8..47099f6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -66,24 +66,16 @@ const RC: [u64; 24] = [ #[allow(unused_assignments)] /// keccak-f[1600] pub fn keccakf(a: &mut [u64; PLEN]) { - let mut arrays: [[u64; 5]; 24] = [[0; 5]; 24]; - for i in 0..24 { + let mut array: [u64; 5] = [0; 5]; + // Theta unroll! { for x in 0..5 { - // This looks useless but it gets way slower without it. I tried using - // `mem::uninitialized` for the initialisation of `arrays` but that also makes - // it slower, although not by as much as removing this assignment. Optimisers - // are weird. Maybe a different version of LLVM will react differently, so if - // you see this comment in the future try deleting this assignment and using - // uninit above and see how it affects the benchmarks. - arrays[i][x] = 0; - unroll! { for y_count in 0..5 { let y = y_count * 5; - arrays[i][x] ^= a[x + y]; + array[x] ^= a[x + y]; } } } @@ -94,7 +86,7 @@ pub fn keccakf(a: &mut [u64; PLEN]) { unroll! { for y_count in 0..5 { let y = y_count * 5; - a[y + x] ^= arrays[i][(x + 4) % 5] ^ arrays[i][(x + 1) % 5].rotate_left(1); + a[y + x] ^= array[(x + 4) % 5] ^ array[(x + 1) % 5].rotate_left(1); } } } @@ -104,9 +96,9 @@ pub fn keccakf(a: &mut [u64; PLEN]) { let mut last = a[1]; unroll! { for x in 0..24 { - arrays[i][0] = a[PI[x]]; + array[0] = a[PI[x]]; a[PI[x]] = last.rotate_left(RHO[x]); - last = arrays[i][0]; + last = array[0]; } } @@ -117,13 +109,13 @@ pub fn keccakf(a: &mut [u64; PLEN]) { unroll! { for x in 0..5 { - arrays[i][x] = a[y + x]; + array[x] = a[y + x]; } } unroll! { for x in 0..5 { - a[y + x] = arrays[i][x] ^ ((!arrays[i][(x + 1) % 5]) & (arrays[i][(x + 2) % 5])); + a[y + x] = array[x] ^ ((!array[(x + 1) % 5]) & (array[(x + 2) % 5])); } } }