diff --git a/src/bit_ops.h b/src/bit_ops.h
index 1edef6679..363dd6164 100644
--- a/src/bit_ops.h
+++ b/src/bit_ops.h
@@ -17,6 +17,7 @@ along with this program. If not, see .
#ifndef RVVM_BIT_OPS_H
#define RVVM_BIT_OPS_H
+#include "compiler.h"
#include "rvvm_types.h"
// Simple bit operations (sign-extend, etc) for internal usage
@@ -28,19 +29,19 @@ along with this program. If not, see .
*
* [ext is now equal to signed lower 20 bits of val]
*/
-static inline int64_t sign_extend(uint64_t val, bitcnt_t bits)
+static forceinline int64_t sign_extend(uint64_t val, bitcnt_t bits)
{
return ((int64_t)(val << (64 - bits))) >> (64 - bits);
}
// Generate bitmask of given size
-static inline uint64_t bit_mask(bitcnt_t count)
+static forceinline uint64_t bit_mask(bitcnt_t count)
{
return (1ULL << count) - 1;
}
// Cut bits from val at given position (from lower bit)
-static inline uint64_t bit_cut(uint64_t val, bitcnt_t pos, bitcnt_t bits)
+static forceinline uint64_t bit_cut(uint64_t val, bitcnt_t pos, bitcnt_t bits)
{
return (val >> pos) & bit_mask(bits);
}
@@ -52,7 +53,7 @@ static inline uint64_t bit_replace(uint64_t val, bitcnt_t pos, bitcnt_t bits, ui
}
// Check if Nth bit of val is 1
-static inline bool bit_check(uint64_t val, bitcnt_t pos)
+static forceinline bool bit_check(uint64_t val, bitcnt_t pos)
{
return (val >> pos) & 0x1;
}
@@ -73,27 +74,137 @@ static inline uint64_t bit_next_pow2(uint64_t val)
return val + 1;
}
-static inline uint32_t bit_rotl32(uint32_t val, bitcnt_t bits)
+// Rotate u32 left
+static forceinline uint32_t bit_rotl32(uint32_t val, bitcnt_t bits)
{
return (val << bits) | (val >> (32 - bits));
}
-static inline uint64_t bit_rotl64(uint64_t val, bitcnt_t bits)
+// Rotate u64 left
+static forceinline uint64_t bit_rotl64(uint64_t val, bitcnt_t bits)
{
return (val << bits) | (val >> (64 - bits));
}
-// Reverse bits in val (from lower bit), remaining bits are zero
-static inline uint64_t bit_reverse(uint64_t val, bitcnt_t bits)
+// Rotate u32 right
+static forceinline uint32_t bit_rotr32(uint32_t val, bitcnt_t bits)
{
- uint64_t ret = 0;
+ return (val >> bits) | (val << (32 - bits));
+}
- for (bitcnt_t i=0; i>= 1;
- }
+// Rotate u64 right
+static forceinline uint64_t bit_rotr64(uint64_t val, bitcnt_t bits)
+{
+ return (val >> bits) | (val << (64 - bits));
+}
+
+// Count leading zeroes (from highest bit position) in u32
+static inline bitcnt_t bit_clz32(uint32_t val)
+{
+ if (unlikely(!val)) return 32;
+#if GNU_BUILTIN(__builtin_clz)
+ return __builtin_clz(val);
+#else
+ bitcnt_t ret = 0;
+ bitcnt_t tmp = (!(val >> 16)) << 4;
+ val >>= 16 - tmp;
+ ret += tmp;
+ tmp = (!(val >> 8)) << 3;
+ val >>= 8 - tmp;
+ ret += tmp;
+ tmp = (!(val >> 4)) << 2;
+ val >>= 4 - tmp;
+ ret += tmp;
+ tmp = (!(val >> 2)) << 1;
+ val >>= 2 - tmp;
+ ret += tmp;
+ tmp = !(val >> 1);
+ val >>= 1 - tmp;
+ ret += tmp;
+ return ret + !(val & 1);
+#endif
+}
+
+// Count leading zeroes (from highest bit position) in u64
+static inline bitcnt_t bit_clz64(uint64_t val)
+{
+ if (unlikely(!val)) return 64;
+#if GNU_BUILTIN(__builtin_clzll) && defined(HOST_64BIT)
+ return __builtin_clzll(val);
+#else
+ bitcnt_t tmp = (!(val >> 32)) << 5;
+ return bit_clz32(val >> (32 - tmp)) + tmp;
+#endif
+}
+
+// Count trailing zeroes (from lowest bit position) in u32
+static inline bitcnt_t bit_ctz32(uint32_t val)
+{
+ if (unlikely(!val)) return 32;
+#if GNU_BUILTIN(__builtin_ctz)
+ return __builtin_ctz(val);
+#else
+ bitcnt_t ret = 0;
+ bitcnt_t tmp = (!((uint16_t)val)) << 4;
+ val >>= tmp;
+ ret += tmp;
+ tmp = (!((uint8_t)val)) << 3;
+ val >>= tmp;
+ ret += tmp;
+ tmp = (!(val & 0xF)) << 2;
+ val >>= tmp;
+ ret += tmp;
+ tmp = (!(val & 0x3)) << 1;
+ val >>= tmp;
+ ret += tmp;
+ return ret + !(val & 0x1);
+#endif
+}
+
+// Count trailing zeroes (from lowest bit position) in u64
+static inline bitcnt_t bit_ctz64(uint64_t val)
+{
+ if (unlikely(!val)) return 64;
+#if GNU_BUILTIN(__builtin_ctzll) && defined(HOST_64BIT)
+ return __builtin_ctzll(val);
+#else
+ bitcnt_t tmp = (!((uint32_t)val)) << 5;
+ return bit_ctz32(val >> tmp) + tmp;
+#endif
+}
+// Count raised bits in u32
+static inline bitcnt_t bit_popcnt32(uint32_t val)
+{
+#if GNU_BUILTIN(__builtin_popcount)
+ return __builtin_popcount(val);
+#else
+ val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
+ val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
+ val = (val & 0x0F0F0F0F) + ((val >> 4) & 0x0F0F0F0F);
+ val = (val & 0x00FF00FF) + ((val >> 8) & 0x00FF00FF);
+ val = (val & 0x0000FFFF) + ((val >> 16) & 0x0000FFFF);
+ return val;
+#endif
+}
+
+// Count raised bits in u64
+static inline bitcnt_t bit_popcnt64(uint64_t val)
+{
+#if GNU_BUILTIN(__builtin_popcountll) && defined(HOST_64BIT)
+ return __builtin_popcountll(val);
+#else
+ return bit_popcnt32(val) + bit_popcnt32(val >> 32);
+#endif
+}
+
+// Bitwise OR-combine, byte granule for orc.b instruction emulation
+static inline uint64_t bit_orc_b(uint64_t val)
+{
+ uint64_t ret = 0;
+ for (size_t i=0; i<64; i+=8) {
+ if ((val >> i) & 0xFF) ret |= (0xFFULL << i);
+ }
return ret;
}