Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Division improvements #99

Merged
merged 10 commits into from
Jun 13, 2019
2 changes: 1 addition & 1 deletion include/intx/int128.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,7 @@ inline std::string to_string(uint<N> x, int base = 10)
while (x != 0)
{
// TODO: Use constexpr udivrem_1?
const auto res = udivrem(x, base);
const auto res = udivrem(x, uint<N>{base});
const auto d = int(res.rem);
const auto c = d < 10 ? '0' + d : 'a' + d - 10;
s.push_back(char(c));
Expand Down
5 changes: 3 additions & 2 deletions include/intx/intx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ struct uint
/// The 2x smaller type.
using half_type = uint<N / 2>;

static constexpr auto num_bits = N;
static constexpr auto num_words = N / 8 / sizeof(word_type);

half_type lo = 0;
Expand Down Expand Up @@ -729,8 +730,8 @@ inline typename std::enable_if<sizeof(Word) < sizeof(Int), unsigned>::type count
return h != 0 ? h + (num_words / 2) : l;
}

div_result<uint256> udivrem(const uint256& u, const uint256& v) noexcept;
div_result<uint512> udivrem(const uint512& x, const uint512& y) noexcept;
template <unsigned N>
div_result<uint<N>> udivrem(const uint<N>& u, const uint<N>& v) noexcept;

template <unsigned N>
constexpr div_result<uint<N>> sdivrem(const uint<N>& u, const uint<N>& v) noexcept
Expand Down
124 changes: 62 additions & 62 deletions lib/intx/div.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,69 +8,54 @@ namespace intx
{
namespace
{
union uint512_words64
/// Divides arbitrary long unsigned integer by 64-bit unsigned integer (1 word).
/// @param u The array of a normalized numerator words. It will contain the quotient after
/// execution.
/// @param m The number of numerator words BEFORE normalization.
/// @param d The normalized denominator.
/// @return The remainder.
inline uint64_t udivrem_by1(uint64_t u[], int m, uint64_t d) noexcept
{
using word_type = uint64_t;
const auto v = reciprocal_2by1(d);

const uint512 number;
word_type words[uint512::num_words];
auto r = u[m]; // Set the top word as remainder.
u[m] = 0; // Reset the word being a part of the result quotient.

constexpr explicit uint512_words64(uint512 x = {}) noexcept : number{x} {}

word_type& operator[](size_t index) { return words[index]; }
};

inline div_result<uint512> udivrem_by1(const normalized_div_args& na) noexcept
{
auto d = na.denominator[0];
auto v = reciprocal_2by1(d);

auto q = uint512_words64{};

auto x = div_result<decltype(q)::word_type>{0, na.numerator[uint512::num_words]};

// OPT: Skip leading zero words.
for (int j = uint512::num_words - 1; j >= 0; --j)
for (int j = m - 1; j >= 0; --j)
{
x = udivrem_2by1({x.rem, na.numerator[j]}, d, v);
q[j] = x.quot;
const auto x = udivrem_2by1({r, u[j]}, d, v);
u[j] = x.quot;
r = x.rem;
}

return {q.number, x.rem >> na.shift};
return r;
}

inline div_result<uint512> udivrem_by2(const normalized_div_args& na) noexcept
/// Divides arbitrary long unsigned integer by 128-bit unsigned integer (2 words).
/// @param u The array of a normalized numerator words. It will contain the quotient after
/// execution.
/// @param m The number of numerator words BEFORE normalization.
/// @param d The normalized denominator.
/// @return The remainder.
inline uint128 udivrem_by2(uint64_t u[], int m, uint128 d) noexcept
{
auto d = uint128{na.denominator[1], na.denominator[0]};
auto v = reciprocal_3by2(d);

auto q = uint512_words64{};
constexpr auto num_words = uint512::num_words;
const auto v = reciprocal_3by2(d);

auto r = uint128{na.numerator[num_words], na.numerator[num_words - 1]};
auto r = uint128{u[m], u[m - 1]}; // Set the 2 top words as remainder.
u[m] = u[m - 1] = 0; // Reset these words being a part of the result quotient.

// OPT: Skip leading zero words.
for (int j = num_words - 2; j >= 0; --j)
for (int j = m - 2; j >= 0; --j)
{
auto res = udivrem_3by2(r.hi, r.lo, na.numerator[j], d, v);
q[j] = res.quot.lo;
r = res.rem;
const auto x = udivrem_3by2(r.hi, r.lo, u[j], d, v);
u[j] = x.quot.lo;
r = x.rem;
}

return {q.number, r >> na.shift};
return r;
}

div_result<uint512> udivrem_knuth(normalized_div_args& na) noexcept
void udivrem_knuth(uint64_t q[], uint64_t un[], int m, const uint64_t vn[], int n) noexcept
{
auto n = na.num_denominator_words;
auto m = na.num_numerator_words;

const auto& vn = na.denominator;
auto& un = na.numerator; // Will be modified.

uint512_words64 q;
uint512_words64 r;

const auto divisor = uint128{vn[n - 1], vn[n - 2]};
const auto reciprocal = reciprocal_2by1(divisor.hi);
for (int j = m - n; j >= 0; --j)
Expand Down Expand Up @@ -145,37 +130,52 @@ div_result<uint512> udivrem_knuth(normalized_div_args& na) noexcept
// un[j+n] += k;
}

// OPT: We can avoid allocating q, un can re used to store quotient.
q[j] = qhat; // Store quotient digit.
}

for (int i = 0; i < n; ++i)
r[i] = na.shift ? (un[i] >> na.shift) | (un[i + 1] << (64 - na.shift)) : un[i];

return {q.number, r.number};
}
} // namespace

div_result<uint256> udivrem(const uint256& u, const uint256& v) noexcept
{
auto x = udivrem(uint512{u}, uint512{v});
return {x.quot.lo, x.rem.lo};
}
} // namespace

div_result<uint512> udivrem(const uint512& u, const uint512& v) noexcept
template <unsigned N>
div_result<uint<N>> udivrem(const uint<N>& u, const uint<N>& v) noexcept
{
auto na = normalize(u, v);

if (na.num_denominator_words > na.num_numerator_words)
return {0, u};

if (na.num_denominator_words == 1)
return udivrem_by1(na);
{
auto r = udivrem_by1(
as_words(na.numerator), na.num_numerator_words, as_words(na.denominator)[0]);
return {na.numerator, r >> na.shift};
}

if (na.num_denominator_words == 2)
return udivrem_by2(na);
{
auto d = as_words(na.denominator);
auto r = udivrem_by2(as_words(na.numerator), na.num_numerator_words, {d[1], d[0]});
return {na.numerator, r >> na.shift};
}

return udivrem_knuth(na);
const auto n = na.num_denominator_words;
const auto m = na.num_numerator_words;

auto un = as_words(na.numerator); // Will be modified.

uint<N> q;
uint<N> r;
auto rw = as_words(r);

udivrem_knuth(as_words(q), &un[0], m, as_words(na.denominator), n);

for (int i = 0; i < n; ++i)
rw[i] = na.shift ? (un[i] >> na.shift) | (un[i + 1] << (64 - na.shift)) : un[i];

return {q, r};
}

template div_result<uint256> udivrem(const uint256& u, const uint256& v) noexcept;
template div_result<uint512> udivrem(const uint512& u, const uint512& v) noexcept;

} // namespace intx
29 changes: 16 additions & 13 deletions lib/intx/div.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,30 @@

namespace intx
{
template <unsigned N>
struct normalized_div_args
{
using word_type = uint64_t;

std::array<word_type, sizeof(uint512) / sizeof(word_type) + 1> numerator;
std::array<word_type, sizeof(uint512) / sizeof(word_type)> denominator;
int num_numerator_words;
uint<N> denominator;
uint<N> numerator;
typename uint<N>::word_type numerator_ex;
int num_denominator_words;
int num_numerator_words;
int shift;
};

inline normalized_div_args normalize(const uint512& numerator, const uint512& denominator) noexcept
template <typename IntT>
inline normalized_div_args<IntT::num_bits> normalize(
const IntT& numerator, const IntT& denominator) noexcept
{
static constexpr auto num_words = int{sizeof(uint512) / sizeof(normalized_div_args::word_type)};
// FIXME: Make the implementation type independent
static constexpr auto num_words = IntT::num_words;

auto* u = as_words(numerator);
auto* v = as_words(denominator);

normalized_div_args na;
auto* un = &na.numerator[0];
auto* vn = &na.denominator[0];
normalized_div_args<IntT::num_bits> na;
auto* un = as_words(na.numerator);
auto* vn = as_words(na.denominator);

auto& m = na.num_numerator_words;
for (m = num_words; m > 0 && u[m - 1] == 0; --m)
Expand All @@ -52,9 +55,9 @@ inline normalized_div_args normalize(const uint512& numerator, const uint512& de
}
else
{
un[num_words] = 0;
std::memcpy(un, u, sizeof(numerator));
std::memcpy(vn, v, sizeof(denominator));
na.numerator_ex = 0;
na.numerator = numerator;
na.denominator = denominator;
}

return na;
Expand Down
2 changes: 1 addition & 1 deletion test/benchmarks/bench_div.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ inline uint64_t udiv_by_reciprocal(uint64_t uu, uint64_t du) noexcept
}


template <decltype(normalize) NormalizeFn>
template <decltype(normalize<uint512>) NormalizeFn>
static void div_normalize(benchmark::State& state)
{
auto u = uint512{{48882153453, 100324254353}, {4343242153453, 1324254353}};
Expand Down
44 changes: 12 additions & 32 deletions test/unittests/test_div.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,59 +17,39 @@ TEST(div, normalize)
EXPECT_EQ(na.shift, 63);
EXPECT_EQ(na.num_denominator_words, 1);
EXPECT_EQ(na.num_numerator_words, 0);
EXPECT_EQ(na.numerator[0], 0);
EXPECT_EQ(na.numerator[1], 0);
EXPECT_EQ(na.numerator[8], 0);
EXPECT_EQ(na.denominator[0], uint64_t{1} << 63);
EXPECT_EQ(na.denominator[1], 0);
EXPECT_EQ(na.numerator, 0);
EXPECT_EQ(na.numerator_ex, 0);
EXPECT_EQ(na.denominator, v << 63);

u = uint512{1414, 1313};
v = uint512{12, 1212};
na = normalize(u, v);
EXPECT_EQ(na.shift, 60);
EXPECT_EQ(na.num_denominator_words, 5);
EXPECT_EQ(na.num_numerator_words, 5);
EXPECT_EQ(na.numerator[0], uint64_t{1313} << 60);
EXPECT_EQ(na.numerator[1], uint64_t{1313} >> (64 - 60));
EXPECT_EQ(na.numerator[2], 0);
EXPECT_EQ(na.numerator[4], uint64_t{1414} << 60);
EXPECT_EQ(na.numerator[na.num_numerator_words], uint64_t{1414} >> (64 - 60));
EXPECT_EQ(na.denominator[0], uint64_t{1212} << 60);
EXPECT_EQ(na.denominator[1], uint64_t{1212} >> (64 - 60));
EXPECT_EQ(na.denominator[2], 0);
EXPECT_EQ(na.denominator[4], uint64_t{12} << 60);
EXPECT_EQ(na.denominator[5], 0);
EXPECT_EQ(na.numerator, u << 60);
EXPECT_EQ(na.numerator_ex, 0);
EXPECT_EQ(na.denominator, v << 60);

u = uint512{3} << 510;
v = uint256{0xffffffffffffffff, 1};
na = normalize(u, v);
EXPECT_EQ(na.shift, 0);
EXPECT_EQ(na.num_denominator_words, 3);
EXPECT_EQ(na.num_numerator_words, 8);
EXPECT_EQ(na.numerator[0], 0);
EXPECT_EQ(na.numerator[1], 0);
EXPECT_EQ(na.numerator[2], 0);
EXPECT_EQ(na.numerator[4], 0);
EXPECT_EQ(na.numerator[7], uint64_t{3} << 62);
EXPECT_EQ(na.numerator[8], 0);
EXPECT_EQ(na.denominator[0], 1);
EXPECT_EQ(na.denominator[1], 0);
EXPECT_EQ(na.denominator[2], 0xffffffffffffffff);
EXPECT_EQ(na.denominator[3], 0);
EXPECT_EQ(na.numerator, u);
EXPECT_EQ(na.numerator_ex, 0);
EXPECT_EQ(na.denominator, v);

u = uint512{7} << 509;
v = uint256{0x3fffffffffffffff, 1};
na = normalize(u, v);
EXPECT_EQ(na.shift, 2);
EXPECT_EQ(na.num_denominator_words, 3);
EXPECT_EQ(na.num_numerator_words, 8);
EXPECT_EQ(na.numerator[6], 0);
EXPECT_EQ(na.numerator[7], uint64_t{1} << 63);
EXPECT_EQ(na.numerator[8], 3);
EXPECT_EQ(na.denominator[0], 4);
EXPECT_EQ(na.denominator[1], 0);
EXPECT_EQ(na.denominator[2], 0xfffffffffffffffc);
EXPECT_EQ(na.denominator[3], 0);
EXPECT_EQ(na.numerator, u << 2);
EXPECT_EQ(na.numerator_ex, 3);
EXPECT_EQ(na.denominator, v << 2);
}

template <typename Int>
Expand Down