Skip to content

Commit

Permalink
Add Solinas reduction for P-192
Browse files Browse the repository at this point in the history
Only faster on 32-bit, it seems. On x86-32 provides a ~20% speedup
  • Loading branch information
randombit committed Jul 9, 2024
1 parent 72fb71b commit 390a9f2
Showing 1 changed file with 88 additions and 0 deletions.
88 changes: 88 additions & 0 deletions src/lib/math/pcurves/pcurves_secp192r1/pcurves_secp192r1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <botan/internal/pcurves_instance.h>

#include <botan/internal/pcurves_solinas.h>
#include <botan/internal/pcurves_wrap.h>

namespace Botan::PCurve {
Expand All @@ -14,6 +15,87 @@ namespace {

namespace secp192r1 {

template <typename Params>
class Secp192r1Rep final {
public:
static constexpr auto P = Params::P;
static constexpr size_t N = Params::N;
typedef typename Params::W W;

constexpr static std::array<W, N> redc(const std::array<W, 2 * N>& z) {
const int64_t X00 = get_uint32(z.data(), 0);
const int64_t X01 = get_uint32(z.data(), 1);
const int64_t X02 = get_uint32(z.data(), 2);
const int64_t X03 = get_uint32(z.data(), 3);
const int64_t X04 = get_uint32(z.data(), 4);
const int64_t X05 = get_uint32(z.data(), 5);
const int64_t X06 = get_uint32(z.data(), 6);
const int64_t X07 = get_uint32(z.data(), 7);
const int64_t X08 = get_uint32(z.data(), 8);
const int64_t X09 = get_uint32(z.data(), 9);
const int64_t X10 = get_uint32(z.data(), 10);
const int64_t X11 = get_uint32(z.data(), 11);

const int64_t S0 = X00 + X06 + X10;
const int64_t S1 = X01 + X07 + X11;
const int64_t S2 = X02 + X06 + X08 + X10;
const int64_t S3 = X03 + X07 + X09 + X11;
const int64_t S4 = X04 + X08 + X10;
const int64_t S5 = X05 + X09 + X11;

std::array<W, N> r = {};

SolinasAccum sum(r);

sum.accum(S0);
sum.accum(S1);
sum.accum(S2);
sum.accum(S3);
sum.accum(S4);
sum.accum(S5);
const auto S = sum.final_carry(0);

BOTAN_DEBUG_ASSERT(S <= 3);

const auto correction = p192_mul_mod_192(S);
W borrow = bigint_sub2(r.data(), N, correction.data(), N);

bigint_cnd_add(borrow, r.data(), N, P.data(), N);

return r;
}

constexpr static std::array<W, N> one() { return std::array<W, N>{1}; }

constexpr static std::array<W, N> to_rep(const std::array<W, N>& x) { return x; }

constexpr static std::array<W, N> wide_to_rep(const std::array<W, 2 * N>& x) { return redc(x); }

constexpr static std::array<W, N> from_rep(const std::array<W, N>& z) { return z; }

private:
// Return (i*P-192) % 2**192
//
// Assumes i is small
constexpr static std::array<W, N> p192_mul_mod_192(W i) {
static_assert(WordInfo<W>::bits == 32 || WordInfo<W>::bits == 64);

// For small i, multiples of P-192 have a simple structure so it's faster to
// compute the value directly vs a (constant time) table lookup

auto r = P;

if constexpr(WordInfo<W>::bits == 32) {
r[2] -= i;
r[0] -= i;
} else {
r[1] -= i;
r[0] -= i;
}
return r;
}
};

// clang-format off
class Params final : public EllipticCurveParameters<
"FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFF",
Expand All @@ -26,7 +108,13 @@ class Params final : public EllipticCurveParameters<

// clang-format on

#if BOTAN_MP_WORD_BITS == 32
// Secp192r1Rep works for 64 bit also, but is at best marginally faster at least
// on compilers/CPUs tested so far
class Curve final : public EllipticCurve<Params, Secp192r1Rep> {};
#else
class Curve final : public EllipticCurve<Params> {};
#endif

} // namespace secp192r1

Expand Down

0 comments on commit 390a9f2

Please sign in to comment.