AztecProtocol · ludamad · Feb 12, 2024 · Feb 6, 2024 · Feb 8, 2024 · Feb 8, 2024
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -155,6 +155,5 @@
   "C_Cpp.vcpkg.enabled": false,
   "C_Cpp.default.includePath": [
     "barretenberg/cpp/src"
-  ],
-  "cmake.sourceDirectory": "/mnt/user-data/adam/aztec-packages/barretenberg/cpp",
+  ]
 }
diff --git a/barretenberg/cpp/scripts/benchmark_remote.sh b/barretenberg/cpp/scripts/benchmark_remote.sh
@@ -9,20 +9,19 @@ set -eu
 
 BENCHMARK=${1:-goblin_bench}
 COMMAND=${2:-./$BENCHMARK}
+PRESET=${3:-clang16}
+BUILD_DIR=${4:-build}
 
 # Move above script dir.
 cd $(dirname $0)/..
 
-# Create lock file
-ssh $BB_SSH_KEY $BB_SSH_INSTANCE "touch ~/BENCHMARKING_IN_PROGRESS"
-
 # Configure and build.
-cmake --preset clang16
-cmake --build --preset clang16 --target $BENCHMARK
+cmake --preset $PRESET
+cmake --build --preset $PRESET --target $BENCHMARK
 
 source scripts/_benchmark_remote_lock.sh
 
-cd build
+cd $BUILD_DIR
 scp $BB_SSH_KEY ./bin/$BENCHMARK $BB_SSH_INSTANCE:$BB_SSH_CPP_PATH/build
 ssh $BB_SSH_KEY $BB_SSH_INSTANCE \
   "cd $BB_SSH_CPP_PATH/build ; $COMMAND"
diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp
@@ -1,6 +1,7 @@
 #pragma once
 #include "barretenberg/common/assert.hpp"
 #include "barretenberg/common/compiler_hints.hpp"
+#include "barretenberg/common/op_count.hpp"
 #include "barretenberg/numeric/random/engine.hpp"
 #include "barretenberg/numeric/uint128/uint128.hpp"
 #include "barretenberg/numeric/uint256/uint256.hpp"
@@ -329,8 +330,29 @@ template <class Params_> struct alignas(32) field {
         // if the modulus is a 256-bit integer, we need to use a basis where g1, g2 have been shifted by 2^384
         if constexpr (Params::modulus_3 >= 0x4000000000000000ULL) {
             split_into_endomorphism_scalars_384(k, k1, k2);
-            return;
+        } else {
+            std::pair<std::array<uint64_t, 2>, std::array<uint64_t, 2>> ret =
+                split_into_endomorphism_scalars_no_shift(k);
+            k1.data[0] = ret.first[0];
+            k1.data[1] = ret.first[1];
+
+            // TODO(AD): We should move away from this hack by adapting split_into_endomorphism_scalars_no_shift
+#if !defined(__clang__) && defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#endif
+            k2.data[0] = ret.second[0]; // NOLINT
+            k2.data[1] = ret.second[1];
+#if !defined(__clang__) && defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
         }
+    }
+
+    static std::pair<std::array<uint64_t, 2>, std::array<uint64_t, 2>> split_into_endomorphism_scalars_no_shift(
+        const field& k)
+    {
+        static_assert(Params::modulus_3 < 0x4000000000000000ULL);
         field input = k.reduce_once();
 
         constexpr field endo_g1 = { Params::endo_g1_lo, Params::endo_g1_mid, Params::endo_g1_hi, 0 };
@@ -369,15 +391,14 @@ template <class Params_> struct alignas(32) field {
         field t1 = (q2_lo - q1_lo).reduce_once();
         field beta = cube_root_of_unity();
         field t2 = (t1 * beta + input).reduce_once();
-        k2.data[0] = t1.data[0];
-        k2.data[1] = t1.data[1];
-        k1.data[0] = t2.data[0];
-        k1.data[1] = t2.data[1];
+        return {
+            { t2.data[0], t2.data[1] },
+            { t1.data[0], t1.data[1] },
+        };
     }
 
     static void split_into_endomorphism_scalars_384(const field& input, field& k1_out, field& k2_out)
     {
-
         constexpr field minus_b1f{
             Params::endo_minus_b1_lo,
             Params::endo_minus_b1_mid,

diff --git a/barretenberg/cpp/src/barretenberg/ecc/groups/affine_element.test.cpp b/barretenberg/cpp/src/barretenberg/ecc/groups/affine_element.test.cpp
@@ -4,6 +4,7 @@
 #include "barretenberg/ecc/curves/grumpkin/grumpkin.hpp"
 #include "barretenberg/ecc/curves/secp256k1/secp256k1.hpp"
 #include "barretenberg/ecc/curves/secp256r1/secp256r1.hpp"
+#include "barretenberg/ecc/groups/element.hpp"
 #include "barretenberg/serialize/test_helper.hpp"
 #include <fstream>
 
@@ -129,3 +130,72 @@ TEST(AffineElement, Msgpack)
     auto [actual, expected] = msgpack_roundtrip(secp256k1::g1::affine_element{ 1, 1 });
     EXPECT_EQ(actual, expected);
 }
+
+namespace bb::group_elements {
+// Kludge to access mul_without_endomorphism;
+class TestElementPrivate {
+  public:
+    template <typename Element, typename Scalar>
+    static Element mul_without_endomorphism(const Element& element, const Scalar& scalar)
+    {
+        return element.mul_without_endomorphism(scalar);
+    }
+    template <typename Element, typename Scalar>
+    static Element mul_with_endomorphism(const Element& element, const Scalar& scalar)
+    {
+        return element.mul_with_endomorphism(scalar);
+    }
+};
+} // namespace bb::group_elements
+
+TEST(AffineElement, EndoMulMatchesNonEndo)
+{
+    for (int i = 0; i < 100; i++) {
+        auto x1 = bb::group_elements::element(grumpkin::g1::affine_element::random_element());
+        auto f1 = grumpkin::fr::random_element();
+        auto r1 = bb::group_elements::TestElementPrivate::mul_without_endomorphism(x1, f1);
+        auto r2 = bb::group_elements::TestElementPrivate::mul_with_endomorphism(x1, f1);
+        EXPECT_EQ(r1, r2);
+    }
+}
+
+TEST(AffineElement, InfinityMul)
+{
+    auto result = grumpkin::g1::affine_element::infinity() * grumpkin::fr::random_element();
+    EXPECT_TRUE(result.is_point_at_infinity());
+}
+
+TEST(AffineElement, BatchMulMatchesMul)
+{
+    constexpr size_t num_points = 1024;
+    std::vector<grumpkin::g1::affine_element> affine_points;
+    for (size_t i = 0; i < num_points; ++i) {
+        affine_points.emplace_back(grumpkin::g1::affine_element::random_element());
+    }
+    grumpkin::fr exponent = grumpkin::fr::random_element();
+    std::vector<grumpkin::g1::affine_element> result =
+        grumpkin::g1::element::batch_mul_with_endomorphism(affine_points, exponent);
+    size_t i = 0;
+    for (grumpkin::g1::affine_element& el : result) {
+        EXPECT_EQ(el, affine_points[i] * exponent);
+        i++;
+    }
+}
+
+TEST(AffineElement, InfinityBatchMul)
+{
+    constexpr size_t num_points = 1024;
+    std::vector<grumpkin::g1::affine_element> affine_points;
+    for (size_t i = 0; i < num_points; ++i) {
+        affine_points.emplace_back(grumpkin::g1::affine_element::infinity());
+    }
+    grumpkin::fr exponent = grumpkin::fr::random_element();
+    std::vector<grumpkin::g1::affine_element> result =
+        grumpkin::g1::element::batch_mul_with_endomorphism(affine_points, exponent);
+    for (grumpkin::g1::affine_element& el : result) {
+        EXPECT_TRUE(el.is_point_at_infinity());
+        if (!el.is_point_at_infinity()) {
+            break; // dont spam with errors
+        }
+    }
+}
diff --git a/barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp b/barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp
@@ -102,6 +102,8 @@ template <class Fq, class Fr, class Params> class alignas(32) element {
     Fq z;
 
   private:
+    // For access to mul_without_endomorphism
+    friend class TestElementPrivate;
     element mul_without_endomorphism(const Fr& exponent) const noexcept;
     element mul_with_endomorphism(const Fr& exponent) const noexcept;
 

diff --git a/barretenberg/cpp/src/barretenberg/ecc/groups/element_impl.hpp b/barretenberg/cpp/src/barretenberg/ecc/groups/element_impl.hpp
@@ -3,6 +3,7 @@
 #include "barretenberg/common/thread.hpp"
 #include "barretenberg/ecc/groups/element.hpp"
 #include "element.hpp"
+#include <cstdint>
 
 // NOLINTBEGIN(readability-implicit-bool-conversion, cppcoreguidelines-avoid-c-arrays)
 namespace bb::group_elements {
@@ -599,96 +600,91 @@ element<Fq, Fr, T> element<Fq, Fr, T>::mul_without_endomorphism(const Fr& expone
     const uint256_t converted_scalar(exponent);
 
     if (converted_scalar == 0) {
-        element result{ Fq::zero(), Fq::zero(), Fq::zero() };
-        result.self_set_infinity();
-        return result;
+        return element::infinity();
     }
 
-    element work_element(*this);
+    element accumulator(*this);
     const uint64_t maximum_set_bit = converted_scalar.get_msb();
     // This is simpler and doublings of infinity should be fast. We should think if we want to defend against the
     // timing leak here (if used with ECDSA it can sometimes lead to private key compromise)
     for (uint64_t i = maximum_set_bit - 1; i < maximum_set_bit; --i) {
-        work_element.self_dbl();
+        accumulator.self_dbl();
         if (converted_scalar.get_bit(i)) {
-            work_element += *this;
+            accumulator += *this;
         }
     }
-    return work_element;
+    return accumulator;
 }
 
+namespace detail {
+using EndoScalars = std::pair<std::array<uint64_t, 2>, std::array<uint64_t, 2>>;
+template <typename Element, std::size_t NUM_ROUNDS> struct EndomorphismWnaf {
+
+    static constexpr size_t NUM_WNAF_BITS = 4;
+
+    std::array<uint64_t, NUM_ROUNDS * 2> table;
+    bool skew = false;
+    bool endo_skew = false;
+
+    EndomorphismWnaf(const EndoScalars& scalars)
+    {
+        wnaf::fixed_wnaf(&scalars.first[0], &table[0], skew, 0, 2, NUM_WNAF_BITS);
+        wnaf::fixed_wnaf(&scalars.second[0], &table[1], endo_skew, 0, 2, NUM_WNAF_BITS);
+    }
+};
+} // namespace detail
+
 template <class Fq, class Fr, class T>
 element<Fq, Fr, T> element<Fq, Fr, T>::mul_with_endomorphism(const Fr& exponent) const noexcept
 {
+    constexpr size_t NUM_ROUNDS = 32;
     const Fr converted_scalar = exponent.from_montgomery_form();
 
-    if (converted_scalar.is_zero()) {
-        element result{ Fq::zero(), Fq::zero(), Fq::zero() };
-        result.self_set_infinity();
-        return result;
+    if (converted_scalar.is_zero() || is_point_at_infinity()) {
+        return element::infinity();
     }
+    static constexpr size_t LOOKUP_SIZE = 8;
+    std::array<element, LOOKUP_SIZE> lookup_table;
 
-    constexpr size_t lookup_size = 8;
-    constexpr size_t num_rounds = 32;
-    constexpr size_t num_wnaf_bits = 4;
-    std::array<element, lookup_size> lookup_table;
-
-    element d2 = element(*this);
-    d2.self_dbl();
+    element d2 = dbl();
     lookup_table[0] = element(*this);
-    for (size_t i = 1; i < lookup_size; ++i) {
+    for (size_t i = 1; i < LOOKUP_SIZE; ++i) {
         lookup_table[i] = lookup_table[i - 1] + d2;
     }
 
-    uint64_t wnaf_table[num_rounds * 2];
-    Fr endo_scalar;
-    Fr::split_into_endomorphism_scalars(converted_scalar, endo_scalar, *(Fr*)&endo_scalar.data[2]); // NOLINT
-
-    bool skew = false;
-    bool endo_skew = false;
-
-    wnaf::fixed_wnaf(&endo_scalar.data[0], &wnaf_table[0], skew, 0, 2, num_wnaf_bits);
-    wnaf::fixed_wnaf(&endo_scalar.data[2], &wnaf_table[1], endo_skew, 0, 2, num_wnaf_bits);
-
-    element work_element{ T::one_x, T::one_y, Fq::one() };
-    work_element.self_set_infinity();
-
-    uint64_t wnaf_entry = 0;
-    uint64_t index = 0;
-    bool sign = false;
+    detail::EndoScalars endo_scalars = Fr::split_into_endomorphism_scalars_no_shift(converted_scalar);
+    detail::EndomorphismWnaf<element, NUM_ROUNDS> wnaf{ endo_scalars };
+    element accumulator{ T::one_x, T::one_y, Fq::one() };
+    accumulator.self_set_infinity();
     Fq beta = Fq::cube_root_of_unity();
 
-    for (size_t i = 0; i < num_rounds * 2; ++i) {
-        wnaf_entry = wnaf_table[i];
-        index = wnaf_entry & 0x0fffffffU;
-        sign = static_cast<bool>((wnaf_entry >> 31) & 1);
+    for (size_t i = 0; i < NUM_ROUNDS * 2; ++i) {
+        uint64_t wnaf_entry = wnaf.table[i];
+        uint64_t index = wnaf_entry & 0x0fffffffU;
+        bool sign = static_cast<bool>((wnaf_entry >> 31) & 1);
         const bool is_odd = ((i & 1) == 1);
         auto to_add = lookup_table[static_cast<size_t>(index)];
         to_add.y.self_conditional_negate(sign ^ is_odd);
         if (is_odd) {
             to_add.x *= beta;
         }
-        work_element += to_add;
+        accumulator += to_add;
 
-        if (i != ((2 * num_rounds) - 1) && is_odd) {
+        if (i != ((2 * NUM_ROUNDS) - 1) && is_odd) {
             for (size_t j = 0; j < 4; ++j) {
-                work_element.self_dbl();
+                accumulator.self_dbl();
             }
         }
     }
 
-    auto temporary = -lookup_table[0];
-    if (skew) {
-        work_element += temporary;
+    if (wnaf.skew) {
+        accumulator += -lookup_table[0];
     }
-
-    temporary = { lookup_table[0].x * beta, lookup_table[0].y, lookup_table[0].z };
-
-    if (endo_skew) {
-        work_element += temporary;
+    if (wnaf.endo_skew) {
+        accumulator += element{ lookup_table[0].x * beta, lookup_table[0].y, lookup_table[0].z };
     }
 
-    return work_element;
+    return accumulator;
 }
 
 /**
@@ -923,8 +919,10 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo
         num_points,
         [&temp_point_vector, &lookup_table, &points](size_t start, size_t end) {
             for (size_t i = start; i < end; ++i) {
-                temp_point_vector[i] = points[i];
-                lookup_table[0][i] = points[i];
+                // If the point is at infinity we fix-up the result later
+                // To avoid 'trying to invert zero in the field' we set the point to 'one' here
+                temp_point_vector[i] = points[i].is_point_at_infinity() ? affine_element::one() : points[i];
+                lookup_table[0][i] = points[i].is_point_at_infinity() ? affine_element::one() : points[i];
             }
         },
         /*finite_field_additions_per_iteration=*/0,
@@ -958,9 +956,17 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo
     uint64_t wnaf_table[num_rounds * 2];
     Fr endo_scalar;
 
+    // TODO(AD): We should move away from this hack by adapting split_into_endomorphism_scalars_no_shift
+#if !defined(__clang__) && defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#endif
     // Split single scalar into 2 scalar in endo form
     Fr::split_into_endomorphism_scalars(converted_scalar, endo_scalar, *(Fr*)&endo_scalar.data[2]); // NOLINT
 
+#if !defined(__clang__) && defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
     bool skew = false;
     bool endo_skew = false;
 
@@ -1067,7 +1073,6 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo
             num_points,
             [beta, &lookup_table, &temp_point_vector](size_t start, size_t end) {
                 for (size_t i = start; i < end; ++i) {
-
                     temp_point_vector[i] = lookup_table[0][i];
                     temp_point_vector[i].x *= beta;
                 }
@@ -1081,6 +1086,22 @@ std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomo
             /*sequential_copy_ops_per_iteration=*/1);
         batch_affine_add_internal(&temp_point_vector[0], &work_elements[0]);
     }
+    // handle points at infinity explicitly
+    run_loop_in_parallel_if_effective(
+        num_points,
+        [&](size_t start, size_t end) {
+            for (size_t i = start; i < end; ++i) {
+                work_elements[i] =
+                    points[i].is_point_at_infinity() ? work_elements[i].set_infinity() : work_elements[i];
+            }
+        },
+        /*finite_field_additions_per_iteration=*/0,
+        /*finite_field_multiplications_per_iteration=*/1,
+        /*finite_field_inversions_per_iteration=*/0,
+        /*group_element_additions_per_iteration=*/0,
+        /*group_element_doublings_per_iteration=*/0,
+        /*scalar_multiplications_per_iteration=*/0,
+        /*sequential_copy_ops_per_iteration=*/1);
 
     return work_elements;
 }