Skip to content

Commit

Permalink
Remove shr 3
Browse files Browse the repository at this point in the history
  • Loading branch information
EbbDrop committed Dec 26, 2024
1 parent 82e64ef commit 4790430
Showing 1 changed file with 1 addition and 13 deletions.
14 changes: 1 addition & 13 deletions src/day25.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,28 +78,24 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
"vpcmpeqq {vt}, {vt}, {zero}",
"vpmovmskb {t}, {vt}",
"popcnt {t}, {t}",
"shr {t}, 3",
"add {sum},{t}",
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*2]",
"vpand {vt}, {vt}, {msb}",
"vpcmpeqq {vt}, {vt}, {zero}",
"vpmovmskb {t}, {vt}",
"popcnt {t}, {t}",
"shr {t}, 3",
"add {sum},{t}",
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*1]",
"vpand {vt}, {vt}, {msb}",
"vpcmpeqq {vt}, {vt}, {zero}",
"vpmovmskb {t}, {vt}",
"popcnt {t}, {t}",
"shr {t}, 3",
"add {sum},{t}",
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*0]",
"vpand {vt}, {vt}, {msb}",
"vpcmpeqq {vt}, {vt}, {zero}",
"vpmovmskb {t}, {vt}",
"popcnt {t}, {t}",
"shr {t}, 3",
"add {sum},{t}",
"cmp {i}, 16",
"jae 4b", // Loop
Expand All @@ -115,7 +111,6 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
"vpcmpeqq {vt}, {vt}, {zero}",
"vpmovmskb {t}, {vt}",
"popcnt {t}, {t}",
"shr {t}, 3",
"add {sum},{t}",
"cmp {i}, 4",
"jae 5b", // Loop
Expand All @@ -130,7 +125,6 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
"vpmovmskb {t}, {vt}",
"and {t:e}, dword ptr [{lut} + 4*{i}]",
"popcnt {t}, {t}",
"shr {t}, 3",
"add {sum},{t}",
"2:",
d = in(ymm_reg) _mm256_set1_epi64x(d as i64),
Expand Down Expand Up @@ -162,28 +156,24 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
"vpcmpeqq {vt}, {vt}, {zero}",
"vpmovmskb {t}, {vt}",
"popcnt {t}, {t}",
"shr {t}, 3",
"add {sum},{t}",
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*2]",
"vpand {vt}, {vt}, {msb}",
"vpcmpeqq {vt}, {vt}, {zero}",
"vpmovmskb {t}, {vt}",
"popcnt {t}, {t}",
"shr {t}, 3",
"add {sum},{t}",
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*1]",
"vpand {vt}, {vt}, {msb}",
"vpcmpeqq {vt}, {vt}, {zero}",
"vpmovmskb {t}, {vt}",
"popcnt {t}, {t}",
"shr {t}, 3",
"add {sum},{t}",
"vpaddq {vt}, {d}, ymmword ptr [{os} + 8*{i} + 32*0]",
"vpand {vt}, {vt}, {msb}",
"vpcmpeqq {vt}, {vt}, {zero}",
"vpmovmskb {t}, {vt}",
"popcnt {t}, {t}",
"shr {t}, 3",
"add {sum},{t}",
"cmp {i}, 16",
"jae 4b", // Loop
Expand All @@ -199,7 +189,6 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
"vpcmpeqq {vt}, {vt}, {zero}",
"vpmovmskb {t}, {vt}",
"popcnt {t}, {t}",
"shr {t}, 3",
"add {sum},{t}",
"cmp {i}, 4",
"jae 5b", // Loop
Expand All @@ -214,7 +203,6 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
"vpmovmskb {t}, {vt}",
"and {t:e}, dword ptr [{lut} + 4*{i}]",
"popcnt {t}, {t}",
"shr {t}, 3",
"add {sum},{t}",
"2:",
d = in(ymm_reg) _mm256_set1_epi64x(d as i64),
Expand All @@ -236,7 +224,7 @@ unsafe fn part1_inner(s: &[u8]) -> u64 {
i += DS;
}

sum
sum / 8
}

#[cfg(test)]
Expand Down

0 comments on commit 4790430

Please sign in to comment.