From 479043070b00e0a41871c25a97d53cf17b36a508 Mon Sep 17 00:00:00 2001 From: EbbDrop Date: Thu, 26 Dec 2024 16:55:23 +0100 Subject: [PATCH] Remove shr 3 --- src/day25.rs | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/day25.rs b/src/day25.rs index 446e2de..688d61e 100644 --- a/src/day25.rs +++ b/src/day25.rs @@ -78,28 +78,24 @@ unsafe fn part1_inner(s: &[u8]) -> u64 { "vpcmpeqq {vt}, {vt}, {zero}", "vpmovmskb {t}, {vt}", "popcnt {t}, {t}", - "shr {t}, 3", "add {sum},{t}", "vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*2]", "vpand {vt}, {vt}, {msb}", "vpcmpeqq {vt}, {vt}, {zero}", "vpmovmskb {t}, {vt}", "popcnt {t}, {t}", - "shr {t}, 3", "add {sum},{t}", "vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*1]", "vpand {vt}, {vt}, {msb}", "vpcmpeqq {vt}, {vt}, {zero}", "vpmovmskb {t}, {vt}", "popcnt {t}, {t}", - "shr {t}, 3", "add {sum},{t}", "vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*0]", "vpand {vt}, {vt}, {msb}", "vpcmpeqq {vt}, {vt}, {zero}", "vpmovmskb {t}, {vt}", "popcnt {t}, {t}", - "shr {t}, 3", "add {sum},{t}", "cmp {i}, 16", "jae 4b", // Loop @@ -115,7 +111,6 @@ unsafe fn part1_inner(s: &[u8]) -> u64 { "vpcmpeqq {vt}, {vt}, {zero}", "vpmovmskb {t}, {vt}", "popcnt {t}, {t}", - "shr {t}, 3", "add {sum},{t}", "cmp {i}, 4", "jae 5b", // Loop @@ -130,7 +125,6 @@ unsafe fn part1_inner(s: &[u8]) -> u64 { "vpmovmskb {t}, {vt}", "and {t:e}, dword ptr [{lut} + 4*{i}]", "popcnt {t}, {t}", - "shr {t}, 3", "add {sum},{t}", "2:", d = in(ymm_reg) _mm256_set1_epi64x(d as i64), @@ -162,28 +156,24 @@ unsafe fn part1_inner(s: &[u8]) -> u64 { "vpcmpeqq {vt}, {vt}, {zero}", "vpmovmskb {t}, {vt}", "popcnt {t}, {t}", - "shr {t}, 3", "add {sum},{t}", "vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*2]", "vpand {vt}, {vt}, {msb}", "vpcmpeqq {vt}, {vt}, {zero}", "vpmovmskb {t}, {vt}", "popcnt {t}, {t}", - "shr {t}, 3", "add {sum},{t}", "vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*1]", "vpand {vt}, {vt}, {msb}", "vpcmpeqq {vt}, {vt}, {zero}", "vpmovmskb {t}, {vt}", "popcnt {t}, {t}", - "shr {t}, 3", "add {sum},{t}", "vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*0]", "vpand {vt}, {vt}, {msb}", "vpcmpeqq {vt}, {vt}, {zero}", "vpmovmskb {t}, {vt}", "popcnt {t}, {t}", - "shr {t}, 3", "add {sum},{t}", "cmp {i}, 16", "jae 4b", // Loop @@ -199,7 +189,6 @@ unsafe fn part1_inner(s: &[u8]) -> u64 { "vpcmpeqq {vt}, {vt}, {zero}", "vpmovmskb {t}, {vt}", "popcnt {t}, {t}", - "shr {t}, 3", "add {sum},{t}", "cmp {i}, 4", "jae 5b", // Loop @@ -214,7 +203,6 @@ unsafe fn part1_inner(s: &[u8]) -> u64 { "vpmovmskb {t}, {vt}", "and {t:e}, dword ptr [{lut} + 4*{i}]", "popcnt {t}, {t}", - "shr {t}, 3", "add {sum},{t}", "2:", d = in(ymm_reg) _mm256_set1_epi64x(d as i64), @@ -236,7 +224,7 @@ unsafe fn part1_inner(s: &[u8]) -> u64 { i += DS; } - sum + sum / 8 } #[cfg(test)]