-
Notifications
You must be signed in to change notification settings - Fork 325
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: mul with endomorphism #4538
Changes from 15 commits
87b1340
20a1a1a
4c9bcc5
834aa95
c9c1530
ee4ed46
1718e13
dd132fc
4c31fc9
5c3783e
9c08a58
9a28126
2a38c9a
f1396c9
3aa3ce2
b95502f
4313e8b
637c531
07c6c59
58693b9
a828c5b
4bfd00b
3347344
2c2e5d3
161262e
f48445f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
#pragma once | ||
#include "barretenberg/common/assert.hpp" | ||
#include "barretenberg/common/compiler_hints.hpp" | ||
#include "barretenberg/common/op_count.hpp" | ||
#include "barretenberg/numeric/random/engine.hpp" | ||
#include "barretenberg/numeric/uint128/uint128.hpp" | ||
#include "barretenberg/numeric/uint256/uint256.hpp" | ||
|
@@ -329,8 +330,29 @@ template <class Params_> struct alignas(32) field { | |
// if the modulus is a 256-bit integer, we need to use a basis where g1, g2 have been shifted by 2^384 | ||
if constexpr (Params::modulus_3 >= 0x4000000000000000ULL) { | ||
split_into_endomorphism_scalars_384(k, k1, k2); | ||
return; | ||
} else { | ||
std::pair<std::array<uint64_t, 2>, std::array<uint64_t, 2>> ret = | ||
split_into_endomorphism_scalars_no_shift(k); | ||
k1.data[0] = ret.first[0]; | ||
k1.data[1] = ret.first[1]; | ||
|
||
// TODO(AD): We should move away from this hack by adapting split_into_endomorphism_scalars_no_shift | ||
#if !defined(__clang__) && defined(__GNUC__) | ||
#pragma GCC diagnostic push | ||
#pragma GCC diagnostic ignored "-Warray-bounds" | ||
#endif | ||
k2.data[0] = ret.second[0]; // NOLINT | ||
k2.data[1] = ret.second[1]; | ||
#if !defined(__clang__) && defined(__GNUC__) | ||
#pragma GCC diagnostic pop | ||
#endif | ||
} | ||
} | ||
|
||
static std::pair<std::array<uint64_t, 2>, std::array<uint64_t, 2>> split_into_endomorphism_scalars_no_shift( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is it called "no_shift"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll just call it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can specify that it works for values less than 256 bits if you want, but shift means something else, so it's a bit confusing |
||
const field& k) | ||
{ | ||
static_assert(Params::modulus_3 < 0x4000000000000000ULL); | ||
field input = k.reduce_once(); | ||
|
||
constexpr field endo_g1 = { Params::endo_g1_lo, Params::endo_g1_mid, Params::endo_g1_hi, 0 }; | ||
|
@@ -369,15 +391,14 @@ template <class Params_> struct alignas(32) field { | |
field t1 = (q2_lo - q1_lo).reduce_once(); | ||
field beta = cube_root_of_unity(); | ||
field t2 = (t1 * beta + input).reduce_once(); | ||
k2.data[0] = t1.data[0]; | ||
k2.data[1] = t1.data[1]; | ||
k1.data[0] = t2.data[0]; | ||
k1.data[1] = t2.data[1]; | ||
return { | ||
{ t2.data[0], t2.data[1] }, | ||
{ t1.data[0], t1.data[1] }, | ||
}; | ||
} | ||
|
||
static void split_into_endomorphism_scalars_384(const field& input, field& k1_out, field& k2_out) | ||
{ | ||
|
||
constexpr field minus_b1f{ | ||
Params::endo_minus_b1_lo, | ||
Params::endo_minus_b1_mid, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ | |
#include "barretenberg/ecc/curves/grumpkin/grumpkin.hpp" | ||
#include "barretenberg/ecc/curves/secp256k1/secp256k1.hpp" | ||
#include "barretenberg/ecc/curves/secp256r1/secp256r1.hpp" | ||
#include "barretenberg/ecc/groups/element.hpp" | ||
#include "barretenberg/serialize/test_helper.hpp" | ||
#include <fstream> | ||
|
||
|
@@ -129,3 +130,72 @@ TEST(AffineElement, Msgpack) | |
auto [actual, expected] = msgpack_roundtrip(secp256k1::g1::affine_element{ 1, 1 }); | ||
EXPECT_EQ(actual, expected); | ||
} | ||
|
||
namespace bb::group_elements { | ||
// Kludge to access mul_without_endomorphism; | ||
class TestElementPrivate { | ||
public: | ||
template <typename Element, typename Scalar> | ||
static Element mul_without_endomorphism(const Element& element, const Scalar& scalar) | ||
{ | ||
return element.mul_without_endomorphism(scalar); | ||
} | ||
template <typename Element, typename Scalar> | ||
static Element mul_with_endomorphism(const Element& element, const Scalar& scalar) | ||
{ | ||
return element.mul_with_endomorphism(scalar); | ||
} | ||
}; | ||
} // namespace bb::group_elements | ||
|
||
TEST(AffineElement, EndoMulMatchesNonEndo) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please expand the names of the tests? If a person has no context, they have to look inside the test to understand what's happening |
||
{ | ||
for (int i = 0; i < 100; i++) { | ||
auto x1 = bb::group_elements::element(grumpkin::g1::affine_element::random_element()); | ||
auto f1 = grumpkin::fr::random_element(); | ||
auto r1 = bb::group_elements::TestElementPrivate::mul_without_endomorphism(x1, f1); | ||
auto r2 = bb::group_elements::TestElementPrivate::mul_with_endomorphism(x1, f1); | ||
EXPECT_EQ(r1, r2); | ||
} | ||
} | ||
|
||
TEST(AffineElement, InfinityMul) | ||
{ | ||
auto result = grumpkin::g1::affine_element::infinity() * grumpkin::fr::random_element(); | ||
EXPECT_TRUE(result.is_point_at_infinity()); | ||
} | ||
|
||
TEST(AffineElement, BatchMulMatchesMul) | ||
{ | ||
constexpr size_t num_points = 1024; | ||
std::vector<grumpkin::g1::affine_element> affine_points; | ||
for (size_t i = 0; i < num_points; ++i) { | ||
affine_points.emplace_back(grumpkin::g1::affine_element::random_element()); | ||
} | ||
grumpkin::fr exponent = grumpkin::fr::random_element(); | ||
std::vector<grumpkin::g1::affine_element> result = | ||
grumpkin::g1::element::batch_mul_with_endomorphism(affine_points, exponent); | ||
size_t i = 0; | ||
for (grumpkin::g1::affine_element& el : result) { | ||
EXPECT_EQ(el, affine_points[i] * exponent); | ||
Rumata888 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
i++; | ||
} | ||
} | ||
|
||
TEST(AffineElement, InfinityBatchMul) | ||
{ | ||
constexpr size_t num_points = 1024; | ||
std::vector<grumpkin::g1::affine_element> affine_points; | ||
for (size_t i = 0; i < num_points; ++i) { | ||
affine_points.emplace_back(grumpkin::g1::affine_element::infinity()); | ||
} | ||
grumpkin::fr exponent = grumpkin::fr::random_element(); | ||
std::vector<grumpkin::g1::affine_element> result = | ||
grumpkin::g1::element::batch_mul_with_endomorphism(affine_points, exponent); | ||
for (grumpkin::g1::affine_element& el : result) { | ||
EXPECT_TRUE(el.is_point_at_infinity()); | ||
if (!el.is_point_at_infinity()) { | ||
break; // dont spam with errors | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
#include "barretenberg/common/thread.hpp" | ||
#include "barretenberg/ecc/groups/element.hpp" | ||
#include "element.hpp" | ||
#include <cstdint> | ||
|
||
// NOLINTBEGIN(readability-implicit-bool-conversion, cppcoreguidelines-avoid-c-arrays) | ||
namespace bb::group_elements { | ||
|
@@ -599,96 +600,91 @@ element<Fq, Fr, T> element<Fq, Fr, T>::mul_without_endomorphism(const Fr& expone | |
const uint256_t converted_scalar(exponent); | ||
|
||
if (converted_scalar == 0) { | ||
element result{ Fq::zero(), Fq::zero(), Fq::zero() }; | ||
result.self_set_infinity(); | ||
return result; | ||
return element::infinity(); | ||
} | ||
|
||
element work_element(*this); | ||
element accumulator(*this); | ||
const uint64_t maximum_set_bit = converted_scalar.get_msb(); | ||
// This is simpler and doublings of infinity should be fast. We should think if we want to defend against the | ||
// timing leak here (if used with ECDSA it can sometimes lead to private key compromise) | ||
for (uint64_t i = maximum_set_bit - 1; i < maximum_set_bit; --i) { | ||
work_element.self_dbl(); | ||
accumulator.self_dbl(); | ||
if (converted_scalar.get_bit(i)) { | ||
work_element += *this; | ||
accumulator += *this; | ||
} | ||
} | ||
return work_element; | ||
return accumulator; | ||
} | ||
|
||
namespace detail { | ||
using EndoScalars = std::pair<std::array<uint64_t, 2>, std::array<uint64_t, 2>>; | ||
template <typename Element, std::size_t NUM_ROUNDS> struct EndomorphismWnaf { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please leave comments specifying what the purpose of this class is. |
||
|
||
static constexpr size_t NUM_WNAF_BITS = 4; | ||
|
||
std::array<uint64_t, NUM_ROUNDS * 2> table; | ||
bool skew = false; | ||
bool endo_skew = false; | ||
|
||
EndomorphismWnaf(const EndoScalars& scalars) | ||
{ | ||
wnaf::fixed_wnaf(&scalars.first[0], &table[0], skew, 0, 2, NUM_WNAF_BITS); | ||
wnaf::fixed_wnaf(&scalars.second[0], &table[1], endo_skew, 0, 2, NUM_WNAF_BITS); | ||
} | ||
}; | ||
} // namespace detail | ||
|
||
template <class Fq, class Fr, class T> | ||
element<Fq, Fr, T> element<Fq, Fr, T>::mul_with_endomorphism(const Fr& exponent) const noexcept | ||
{ | ||
constexpr size_t NUM_ROUNDS = 32; | ||
const Fr converted_scalar = exponent.from_montgomery_form(); | ||
|
||
if (converted_scalar.is_zero()) { | ||
element result{ Fq::zero(), Fq::zero(), Fq::zero() }; | ||
result.self_set_infinity(); | ||
return result; | ||
if (converted_scalar.is_zero() || is_point_at_infinity()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd move it above line 640, then you avoid useless multiplication in conversion from montgomery There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure. I was wondering how much we cared about optimizing infinity points vs readability, but I can move it up |
||
return element::infinity(); | ||
} | ||
static constexpr size_t LOOKUP_SIZE = 8; | ||
std::array<element, LOOKUP_SIZE> lookup_table; | ||
|
||
constexpr size_t lookup_size = 8; | ||
constexpr size_t num_rounds = 32; | ||
constexpr size_t num_wnaf_bits = 4; | ||
std::array<element, lookup_size> lookup_table; | ||
|
||
element d2 = element(*this); | ||
d2.self_dbl(); | ||
element d2 = dbl(); | ||
lookup_table[0] = element(*this); | ||
for (size_t i = 1; i < lookup_size; ++i) { | ||
for (size_t i = 1; i < LOOKUP_SIZE; ++i) { | ||
lookup_table[i] = lookup_table[i - 1] + d2; | ||
} | ||
|
||
uint64_t wnaf_table[num_rounds * 2]; | ||
Fr endo_scalar; | ||
Fr::split_into_endomorphism_scalars(converted_scalar, endo_scalar, *(Fr*)&endo_scalar.data[2]); // NOLINT | ||
|
||
bool skew = false; | ||
bool endo_skew = false; | ||
|
||
wnaf::fixed_wnaf(&endo_scalar.data[0], &wnaf_table[0], skew, 0, 2, num_wnaf_bits); | ||
wnaf::fixed_wnaf(&endo_scalar.data[2], &wnaf_table[1], endo_skew, 0, 2, num_wnaf_bits); | ||
|
||
element work_element{ T::one_x, T::one_y, Fq::one() }; | ||
work_element.self_set_infinity(); | ||
|
||
uint64_t wnaf_entry = 0; | ||
uint64_t index = 0; | ||
bool sign = false; | ||
detail::EndoScalars endo_scalars = Fr::split_into_endomorphism_scalars_no_shift(converted_scalar); | ||
detail::EndomorphismWnaf<element, NUM_ROUNDS> wnaf{ endo_scalars }; | ||
element accumulator{ T::one_x, T::one_y, Fq::one() }; | ||
accumulator.self_set_infinity(); | ||
Fq beta = Fq::cube_root_of_unity(); | ||
|
||
for (size_t i = 0; i < num_rounds * 2; ++i) { | ||
wnaf_entry = wnaf_table[i]; | ||
index = wnaf_entry & 0x0fffffffU; | ||
sign = static_cast<bool>((wnaf_entry >> 31) & 1); | ||
for (size_t i = 0; i < NUM_ROUNDS * 2; ++i) { | ||
uint64_t wnaf_entry = wnaf.table[i]; | ||
uint64_t index = wnaf_entry & 0x0fffffffU; | ||
bool sign = static_cast<bool>((wnaf_entry >> 31) & 1); | ||
const bool is_odd = ((i & 1) == 1); | ||
auto to_add = lookup_table[static_cast<size_t>(index)]; | ||
to_add.y.self_conditional_negate(sign ^ is_odd); | ||
if (is_odd) { | ||
to_add.x *= beta; | ||
} | ||
work_element += to_add; | ||
accumulator += to_add; | ||
|
||
if (i != ((2 * num_rounds) - 1) && is_odd) { | ||
if (i != ((2 * NUM_ROUNDS) - 1) && is_odd) { | ||
for (size_t j = 0; j < 4; ++j) { | ||
work_element.self_dbl(); | ||
accumulator.self_dbl(); | ||
} | ||
} | ||
} | ||
|
||
auto temporary = -lookup_table[0]; | ||
if (skew) { | ||
work_element += temporary; | ||
if (wnaf.skew) { | ||
accumulator += -lookup_table[0]; | ||
} | ||
|
||
temporary = { lookup_table[0].x * beta, lookup_table[0].y, lookup_table[0].z }; | ||
|
||
if (endo_skew) { | ||
work_element += temporary; | ||
if (wnaf.endo_skew) { | ||
accumulator += element{ lookup_table[0].x * beta, lookup_table[0].y, lookup_table[0].z }; | ||
} | ||
|
||
return work_element; | ||
return accumulator; | ||
} | ||
|
||
/** | ||
|
@@ -923,8 +919,10 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo | |
num_points, | ||
[&temp_point_vector, &lookup_table, &points](size_t start, size_t end) { | ||
for (size_t i = start; i < end; ++i) { | ||
temp_point_vector[i] = points[i]; | ||
lookup_table[0][i] = points[i]; | ||
// If the point is at infinity we fix-up the result later | ||
// To avoid 'trying to invert zero in the field' we set the point to 'one' here | ||
temp_point_vector[i] = points[i].is_point_at_infinity() ? affine_element::one() : points[i]; | ||
lookup_table[0][i] = points[i].is_point_at_infinity() ? affine_element::one() : points[i]; | ||
} | ||
}, | ||
/*finite_field_additions_per_iteration=*/0, | ||
|
@@ -958,9 +956,17 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo | |
uint64_t wnaf_table[num_rounds * 2]; | ||
Fr endo_scalar; | ||
|
||
// TODO(AD): We should move away from this hack by adapting split_into_endomorphism_scalars_no_shift | ||
#if !defined(__clang__) && defined(__GNUC__) | ||
#pragma GCC diagnostic push | ||
#pragma GCC diagnostic ignored "-Warray-bounds" | ||
#endif | ||
// Split single scalar into 2 scalar in endo form | ||
Fr::split_into_endomorphism_scalars(converted_scalar, endo_scalar, *(Fr*)&endo_scalar.data[2]); // NOLINT | ||
|
||
#if !defined(__clang__) && defined(__GNUC__) | ||
#pragma GCC diagnostic pop | ||
#endif | ||
bool skew = false; | ||
bool endo_skew = false; | ||
|
||
|
@@ -1067,7 +1073,6 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo | |
num_points, | ||
[beta, &lookup_table, &temp_point_vector](size_t start, size_t end) { | ||
for (size_t i = start; i < end; ++i) { | ||
|
||
temp_point_vector[i] = lookup_table[0][i]; | ||
temp_point_vector[i].x *= beta; | ||
} | ||
|
@@ -1081,6 +1086,22 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo | |
/*sequential_copy_ops_per_iteration=*/1); | ||
batch_affine_add_internal(&temp_point_vector[0], &work_elements[0]); | ||
} | ||
// handle points at infinity explicitly | ||
run_loop_in_parallel_if_effective( | ||
num_points, | ||
[&](size_t start, size_t end) { | ||
for (size_t i = start; i < end; ++i) { | ||
work_elements[i] = | ||
points[i].is_point_at_infinity() ? work_elements[i].set_infinity() : work_elements[i]; | ||
} | ||
}, | ||
/*finite_field_additions_per_iteration=*/0, | ||
/*finite_field_multiplications_per_iteration=*/1, | ||
/*finite_field_inversions_per_iteration=*/0, | ||
/*group_element_additions_per_iteration=*/0, | ||
/*group_element_doublings_per_iteration=*/0, | ||
/*scalar_multiplications_per_iteration=*/0, | ||
/*sequential_copy_ops_per_iteration=*/1); | ||
|
||
return work_elements; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just lets us remote benchmark other presets + build folders