Skip to content

Commit

Permalink
Merge pull request #99 from chfast/div
Browse files Browse the repository at this point in the history
Division improvements
  • Loading branch information
chfast authored Jun 13, 2019
2 parents 3359f3d + 044fa52 commit 80c527d
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 111 deletions.
2 changes: 1 addition & 1 deletion include/intx/int128.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,7 @@ inline std::string to_string(uint<N> x, int base = 10)
while (x != 0)
{
// TODO: Use constexpr udivrem_1?
const auto res = udivrem(x, base);
const auto res = udivrem(x, uint<N>{base});
const auto d = int(res.rem);
const auto c = d < 10 ? '0' + d : 'a' + d - 10;
s.push_back(char(c));
Expand Down
5 changes: 3 additions & 2 deletions include/intx/intx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ struct uint
/// The 2x smaller type.
using half_type = uint<N / 2>;

static constexpr auto num_bits = N;
static constexpr auto num_words = N / 8 / sizeof(word_type);

half_type lo = 0;
Expand Down Expand Up @@ -729,8 +730,8 @@ inline typename std::enable_if<sizeof(Word) < sizeof(Int), unsigned>::type count
return h != 0 ? h + (num_words / 2) : l;
}

div_result<uint256> udivrem(const uint256& u, const uint256& v) noexcept;
div_result<uint512> udivrem(const uint512& x, const uint512& y) noexcept;
template <unsigned N>
div_result<uint<N>> udivrem(const uint<N>& u, const uint<N>& v) noexcept;

template <unsigned N>
constexpr div_result<uint<N>> sdivrem(const uint<N>& u, const uint<N>& v) noexcept
Expand Down
124 changes: 62 additions & 62 deletions lib/intx/div.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,69 +8,54 @@ namespace intx
{
namespace
{
union uint512_words64
/// Divides arbitrary long unsigned integer by 64-bit unsigned integer (1 word).
/// @param u The array of a normalized numerator words. It will contain the quotient after
/// execution.
/// @param m The number of numerator words BEFORE normalization.
/// @param d The normalized denominator.
/// @return The remainder.
inline uint64_t udivrem_by1(uint64_t u[], int m, uint64_t d) noexcept
{
using word_type = uint64_t;
const auto v = reciprocal_2by1(d);

const uint512 number;
word_type words[uint512::num_words];
auto r = u[m]; // Set the top word as remainder.
u[m] = 0; // Reset the word being a part of the result quotient.

constexpr explicit uint512_words64(uint512 x = {}) noexcept : number{x} {}

word_type& operator[](size_t index) { return words[index]; }
};

inline div_result<uint512> udivrem_by1(const normalized_div_args& na) noexcept
{
auto d = na.denominator[0];
auto v = reciprocal_2by1(d);

auto q = uint512_words64{};

auto x = div_result<decltype(q)::word_type>{0, na.numerator[uint512::num_words]};

// OPT: Skip leading zero words.
for (int j = uint512::num_words - 1; j >= 0; --j)
for (int j = m - 1; j >= 0; --j)
{
x = udivrem_2by1({x.rem, na.numerator[j]}, d, v);
q[j] = x.quot;
const auto x = udivrem_2by1({r, u[j]}, d, v);
u[j] = x.quot;
r = x.rem;
}

return {q.number, x.rem >> na.shift};
return r;
}

inline div_result<uint512> udivrem_by2(const normalized_div_args& na) noexcept
/// Divides arbitrary long unsigned integer by 128-bit unsigned integer (2 words).
/// @param u The array of a normalized numerator words. It will contain the quotient after
/// execution.
/// @param m The number of numerator words BEFORE normalization.
/// @param d The normalized denominator.
/// @return The remainder.
inline uint128 udivrem_by2(uint64_t u[], int m, uint128 d) noexcept
{
auto d = uint128{na.denominator[1], na.denominator[0]};
auto v = reciprocal_3by2(d);

auto q = uint512_words64{};
constexpr auto num_words = uint512::num_words;
const auto v = reciprocal_3by2(d);

auto r = uint128{na.numerator[num_words], na.numerator[num_words - 1]};
auto r = uint128{u[m], u[m - 1]}; // Set the 2 top words as remainder.
u[m] = u[m - 1] = 0; // Reset these words being a part of the result quotient.

// OPT: Skip leading zero words.
for (int j = num_words - 2; j >= 0; --j)
for (int j = m - 2; j >= 0; --j)
{
auto res = udivrem_3by2(r.hi, r.lo, na.numerator[j], d, v);
q[j] = res.quot.lo;
r = res.rem;
const auto x = udivrem_3by2(r.hi, r.lo, u[j], d, v);
u[j] = x.quot.lo;
r = x.rem;
}

return {q.number, r >> na.shift};
return r;
}

div_result<uint512> udivrem_knuth(normalized_div_args& na) noexcept
void udivrem_knuth(uint64_t q[], uint64_t un[], int m, const uint64_t vn[], int n) noexcept
{
auto n = na.num_denominator_words;
auto m = na.num_numerator_words;

const auto& vn = na.denominator;
auto& un = na.numerator; // Will be modified.

uint512_words64 q;
uint512_words64 r;

const auto divisor = uint128{vn[n - 1], vn[n - 2]};
const auto reciprocal = reciprocal_2by1(divisor.hi);
for (int j = m - n; j >= 0; --j)
Expand Down Expand Up @@ -145,37 +130,52 @@ div_result<uint512> udivrem_knuth(normalized_div_args& na) noexcept
// un[j+n] += k;
}

// OPT: We can avoid allocating q, un can re used to store quotient.
q[j] = qhat; // Store quotient digit.
}

for (int i = 0; i < n; ++i)
r[i] = na.shift ? (un[i] >> na.shift) | (un[i + 1] << (64 - na.shift)) : un[i];

return {q.number, r.number};
}
} // namespace

div_result<uint256> udivrem(const uint256& u, const uint256& v) noexcept
{
auto x = udivrem(uint512{u}, uint512{v});
return {x.quot.lo, x.rem.lo};
}
} // namespace

div_result<uint512> udivrem(const uint512& u, const uint512& v) noexcept
template <unsigned N>
div_result<uint<N>> udivrem(const uint<N>& u, const uint<N>& v) noexcept
{
auto na = normalize(u, v);

if (na.num_denominator_words > na.num_numerator_words)
return {0, u};

if (na.num_denominator_words == 1)
return udivrem_by1(na);
{
auto r = udivrem_by1(
as_words(na.numerator), na.num_numerator_words, as_words(na.denominator)[0]);
return {na.numerator, r >> na.shift};
}

if (na.num_denominator_words == 2)
return udivrem_by2(na);
{
auto d = as_words(na.denominator);
auto r = udivrem_by2(as_words(na.numerator), na.num_numerator_words, {d[1], d[0]});
return {na.numerator, r >> na.shift};
}

return udivrem_knuth(na);
const auto n = na.num_denominator_words;
const auto m = na.num_numerator_words;

auto un = as_words(na.numerator); // Will be modified.

uint<N> q;
uint<N> r;
auto rw = as_words(r);

udivrem_knuth(as_words(q), &un[0], m, as_words(na.denominator), n);

for (int i = 0; i < n; ++i)
rw[i] = na.shift ? (un[i] >> na.shift) | (un[i + 1] << (64 - na.shift)) : un[i];

return {q, r};
}

template div_result<uint256> udivrem(const uint256& u, const uint256& v) noexcept;
template div_result<uint512> udivrem(const uint512& u, const uint512& v) noexcept;

} // namespace intx
29 changes: 16 additions & 13 deletions lib/intx/div.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,30 @@

namespace intx
{
template <unsigned N>
struct normalized_div_args
{
using word_type = uint64_t;

std::array<word_type, sizeof(uint512) / sizeof(word_type) + 1> numerator;
std::array<word_type, sizeof(uint512) / sizeof(word_type)> denominator;
int num_numerator_words;
uint<N> denominator;
uint<N> numerator;
typename uint<N>::word_type numerator_ex;
int num_denominator_words;
int num_numerator_words;
int shift;
};

inline normalized_div_args normalize(const uint512& numerator, const uint512& denominator) noexcept
template <typename IntT>
inline normalized_div_args<IntT::num_bits> normalize(
const IntT& numerator, const IntT& denominator) noexcept
{
static constexpr auto num_words = int{sizeof(uint512) / sizeof(normalized_div_args::word_type)};
// FIXME: Make the implementation type independent
static constexpr auto num_words = IntT::num_words;

auto* u = as_words(numerator);
auto* v = as_words(denominator);

normalized_div_args na;
auto* un = &na.numerator[0];
auto* vn = &na.denominator[0];
normalized_div_args<IntT::num_bits> na;
auto* un = as_words(na.numerator);
auto* vn = as_words(na.denominator);

auto& m = na.num_numerator_words;
for (m = num_words; m > 0 && u[m - 1] == 0; --m)
Expand All @@ -52,9 +55,9 @@ inline normalized_div_args normalize(const uint512& numerator, const uint512& de
}
else
{
un[num_words] = 0;
std::memcpy(un, u, sizeof(numerator));
std::memcpy(vn, v, sizeof(denominator));
na.numerator_ex = 0;
na.numerator = numerator;
na.denominator = denominator;
}

return na;
Expand Down
2 changes: 1 addition & 1 deletion test/benchmarks/bench_div.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ inline uint64_t udiv_by_reciprocal(uint64_t uu, uint64_t du) noexcept
}


template <decltype(normalize) NormalizeFn>
template <decltype(normalize<uint512>) NormalizeFn>
static void div_normalize(benchmark::State& state)
{
auto u = uint512{{48882153453, 100324254353}, {4343242153453, 1324254353}};
Expand Down
44 changes: 12 additions & 32 deletions test/unittests/test_div.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,59 +17,39 @@ TEST(div, normalize)
EXPECT_EQ(na.shift, 63);
EXPECT_EQ(na.num_denominator_words, 1);
EXPECT_EQ(na.num_numerator_words, 0);
EXPECT_EQ(na.numerator[0], 0);
EXPECT_EQ(na.numerator[1], 0);
EXPECT_EQ(na.numerator[8], 0);
EXPECT_EQ(na.denominator[0], uint64_t{1} << 63);
EXPECT_EQ(na.denominator[1], 0);
EXPECT_EQ(na.numerator, 0);
EXPECT_EQ(na.numerator_ex, 0);
EXPECT_EQ(na.denominator, v << 63);

u = uint512{1414, 1313};
v = uint512{12, 1212};
na = normalize(u, v);
EXPECT_EQ(na.shift, 60);
EXPECT_EQ(na.num_denominator_words, 5);
EXPECT_EQ(na.num_numerator_words, 5);
EXPECT_EQ(na.numerator[0], uint64_t{1313} << 60);
EXPECT_EQ(na.numerator[1], uint64_t{1313} >> (64 - 60));
EXPECT_EQ(na.numerator[2], 0);
EXPECT_EQ(na.numerator[4], uint64_t{1414} << 60);
EXPECT_EQ(na.numerator[na.num_numerator_words], uint64_t{1414} >> (64 - 60));
EXPECT_EQ(na.denominator[0], uint64_t{1212} << 60);
EXPECT_EQ(na.denominator[1], uint64_t{1212} >> (64 - 60));
EXPECT_EQ(na.denominator[2], 0);
EXPECT_EQ(na.denominator[4], uint64_t{12} << 60);
EXPECT_EQ(na.denominator[5], 0);
EXPECT_EQ(na.numerator, u << 60);
EXPECT_EQ(na.numerator_ex, 0);
EXPECT_EQ(na.denominator, v << 60);

u = uint512{3} << 510;
v = uint256{0xffffffffffffffff, 1};
na = normalize(u, v);
EXPECT_EQ(na.shift, 0);
EXPECT_EQ(na.num_denominator_words, 3);
EXPECT_EQ(na.num_numerator_words, 8);
EXPECT_EQ(na.numerator[0], 0);
EXPECT_EQ(na.numerator[1], 0);
EXPECT_EQ(na.numerator[2], 0);
EXPECT_EQ(na.numerator[4], 0);
EXPECT_EQ(na.numerator[7], uint64_t{3} << 62);
EXPECT_EQ(na.numerator[8], 0);
EXPECT_EQ(na.denominator[0], 1);
EXPECT_EQ(na.denominator[1], 0);
EXPECT_EQ(na.denominator[2], 0xffffffffffffffff);
EXPECT_EQ(na.denominator[3], 0);
EXPECT_EQ(na.numerator, u);
EXPECT_EQ(na.numerator_ex, 0);
EXPECT_EQ(na.denominator, v);

u = uint512{7} << 509;
v = uint256{0x3fffffffffffffff, 1};
na = normalize(u, v);
EXPECT_EQ(na.shift, 2);
EXPECT_EQ(na.num_denominator_words, 3);
EXPECT_EQ(na.num_numerator_words, 8);
EXPECT_EQ(na.numerator[6], 0);
EXPECT_EQ(na.numerator[7], uint64_t{1} << 63);
EXPECT_EQ(na.numerator[8], 3);
EXPECT_EQ(na.denominator[0], 4);
EXPECT_EQ(na.denominator[1], 0);
EXPECT_EQ(na.denominator[2], 0xfffffffffffffffc);
EXPECT_EQ(na.denominator[3], 0);
EXPECT_EQ(na.numerator, u << 2);
EXPECT_EQ(na.numerator_ex, 3);
EXPECT_EQ(na.denominator, v << 2);
}

template <typename Int>
Expand Down

0 comments on commit 80c527d

Please sign in to comment.