From 0cef41333ee5020c553218c64f1abd210cb3a6ca Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Mon, 12 Aug 2024 14:08:55 +0200 Subject: [PATCH 01/18] Sync approaches for G2 and G1. --- src/ep/relic_ep_mul.c | 2 -- src/epx/relic_ep2_mul.c | 40 ++++++++++++++++++++++++++----------- src/epx/relic_ep2_mul_sim.c | 26 +++++++++--------------- 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/src/ep/relic_ep_mul.c b/src/ep/relic_ep_mul.c index c79cf404d..5e554cfc5 100644 --- a/src/ep/relic_ep_mul.c +++ b/src/ep/relic_ep_mul.c @@ -77,8 +77,6 @@ static void ep_mul_glv_imp(ep_t r, const ep_t p, const bn_t k) { bn_rec_glv(k0, k1, _k, n, (const bn_t *)v1, (const bn_t *)v2); s0 = bn_sign(k0); s1 = bn_sign(k1); - bn_abs(k0, k0); - bn_abs(k1, k1); if (s0 == RLC_POS) { ep_tab(t, p, RLC_WIDTH); diff --git a/src/epx/relic_ep2_mul.c b/src/epx/relic_ep2_mul.c index 2e2386cbf..3e9e95bb8 100644 --- a/src/epx/relic_ep2_mul.c +++ b/src/epx/relic_ep2_mul.c @@ -44,7 +44,7 @@ static void ep2_mul_gls_imp(ep2_t r, const ep2_t p, const bn_t k) { size_t l, _l[4]; bn_t n, _k[4], u; int8_t naf[4][RLC_FP_BITS + 1]; - ep2_t q[4]; + ep2_t q[4], t[4][1 << (RLC_WIDTH - 2)]; bn_null(n); bn_null(u); @@ -52,11 +52,15 @@ static void ep2_mul_gls_imp(ep2_t r, const ep2_t p, const bn_t k) { RLC_TRY { bn_new(n); bn_new(u); - for (int i = 0; i < 4; i++) { + for (size_t i = 0; i < 4; i++) { bn_null(_k[i]); ep2_null(q[i]); bn_new(_k[i]); ep2_new(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep2_null(t[i][j]); + ep2_new(t[i][j]); + } } ep2_curve_get_ord(n); @@ -70,25 +74,35 @@ static void ep2_mul_gls_imp(ep2_t r, const ep2_t p, const bn_t k) { ep2_frb(q[3], q[2], 1); l = 0; - for (int i = 0; i < 4; i++) { - if (bn_sign(_k[i]) == RLC_NEG) { - ep2_neg(q[i], q[i]); - } + for (size_t i = 0; i < 4; i++) { _l[i] = RLC_FP_BITS + 1; - bn_rec_naf(naf[i], &_l[i], _k[i], 2); + bn_rec_naf(naf[i], &_l[i], _k[i], RLC_WIDTH); l = RLC_MAX(l, _l[i]); + if (i == 0) { + if (bn_sign(_k[0]) == RLC_NEG) { + ep2_neg(q[0], q[0]); + } + ep2_tab(t[0], q[0], RLC_WIDTH); + } else { + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep2_frb(t[i][j], t[i - 1][j], 1); + if (bn_sign(_k[i]) != bn_sign(_k[i - 1])) { + ep2_neg(t[i][j], t[i][j]); + } + } + } } ep2_set_infty(r); for (int j = l - 1; j >= 0; j--) { ep2_dbl(r, r); - for (int i = 0; i < 4; i++) { + for (size_t i = 0; i < 4; i++) { if (naf[i][j] > 0) { - ep2_add(r, r, q[i]); + ep2_add(r, r, t[i][naf[i][j] / 2]); } if (naf[i][j] < 0) { - ep2_sub(r, r, q[i]); + ep2_sub(r, r, t[i][-naf[i][j] / 2]); } } } @@ -102,11 +116,13 @@ static void ep2_mul_gls_imp(ep2_t r, const ep2_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - for (int i = 0; i < 4; i++) { + for (size_t i = 0; i < 4; i++) { bn_free(_k[i]); ep2_free(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep2_free(t[i][j]); + } } - } } diff --git a/src/epx/relic_ep2_mul_sim.c b/src/epx/relic_ep2_mul_sim.c index c39c432f6..d163c42bd 100644 --- a/src/epx/relic_ep2_mul_sim.c +++ b/src/epx/relic_ep2_mul_sim.c @@ -280,9 +280,7 @@ void ep2_mul_sim_basic(ep2_t r, const ep2_t p, const bn_t k, const ep2_t q, void ep2_mul_sim_trick(ep2_t r, const ep2_t p, const bn_t k, const ep2_t q, const bn_t m) { - ep2_t t0[1 << (RLC_WIDTH / 2)]; - ep2_t t1[1 << (RLC_WIDTH / 2)]; - ep2_t t[1 << RLC_WIDTH]; + ep2_t t0[1 << (RLC_WIDTH / 2)], t1[1 << (RLC_WIDTH / 2)], t[1 << RLC_WIDTH]; bn_t n, _k, _m; size_t l0, l1, w = RLC_WIDTH / 2; uint8_t w0[2 * RLC_FP_BITS], w1[2 * RLC_FP_BITS]; @@ -305,10 +303,6 @@ void ep2_mul_sim_trick(ep2_t r, const ep2_t p, const bn_t k, const ep2_t q, bn_new(_k); bn_new(_m); - ep2_curve_get_ord(n); - bn_mod(_k, k, n); - bn_mod(_m, m, n); - for (int i = 0; i < (1 << w); i++) { ep2_null(t0[i]); ep2_null(t1[i]); @@ -320,21 +314,19 @@ void ep2_mul_sim_trick(ep2_t r, const ep2_t p, const bn_t k, const ep2_t q, ep2_new(t[i]); } + ep2_curve_get_ord(n); + bn_mod(_k, k, n); + bn_mod(_m, m, n); + ep2_set_infty(t0[0]); ep2_copy(t0[1], p); - if (bn_sign(k) == RLC_NEG) { - ep2_neg(t0[1], t0[1]); - } for (int i = 2; i < (1 << w); i++) { ep2_add(t0[i], t0[i - 1], t0[1]); } ep2_set_infty(t1[0]); ep2_copy(t1[1], q); - if (bn_sign(m) == RLC_NEG) { - ep2_neg(t1[1], t1[1]); - } - for (int i = 1; i < (1 << w); i++) { + for (int i = 2; i < (1 << w); i++) { ep2_add(t1[i], t1[i - 1], t1[1]); } @@ -345,12 +337,12 @@ void ep2_mul_sim_trick(ep2_t r, const ep2_t p, const bn_t k, const ep2_t q, } #if defined(EP_MIXED) - ep2_norm_sim(t + 1, t + 1, (1 << (RLC_WIDTH)) - 1); + ep2_norm_sim(t + 2, (const ep2_t *)(t + 2), (1 << (w + w)) - 2); #endif l0 = l1 = RLC_CEIL(2 * RLC_FP_BITS, w); - bn_rec_win(w0, &l0, k, w); - bn_rec_win(w1, &l1, m, w); + bn_rec_win(w0, &l0, _k, w); + bn_rec_win(w1, &l1, _m, w); ep2_set_infty(r); for (int i = RLC_MAX(l0, l1) - 1; i >= 0; i--) { From 8b7786fabc043ece51052b9c3c512a47b3b213ea Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Mon, 12 Aug 2024 14:36:34 +0200 Subject: [PATCH 02/18] Simplify code. --- src/epx/relic_ep2_mul.c | 18 ++-- src/pc/relic_pc_exp.c | 180 ++++++++++++++++++++-------------------- 2 files changed, 98 insertions(+), 100 deletions(-) diff --git a/src/epx/relic_ep2_mul.c b/src/epx/relic_ep2_mul.c index 3e9e95bb8..5533c5cb9 100644 --- a/src/epx/relic_ep2_mul.c +++ b/src/epx/relic_ep2_mul.c @@ -44,19 +44,19 @@ static void ep2_mul_gls_imp(ep2_t r, const ep2_t p, const bn_t k) { size_t l, _l[4]; bn_t n, _k[4], u; int8_t naf[4][RLC_FP_BITS + 1]; - ep2_t q[4], t[4][1 << (RLC_WIDTH - 2)]; + ep2_t q, t[4][1 << (RLC_WIDTH - 2)]; bn_null(n); bn_null(u); + ep2_null(q); RLC_TRY { bn_new(n); bn_new(u); + ep2_new(q); for (size_t i = 0; i < 4; i++) { bn_null(_k[i]); - ep2_null(q[i]); bn_new(_k[i]); - ep2_new(q[i]); for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { ep2_null(t[i][j]); ep2_new(t[i][j]); @@ -68,21 +68,17 @@ static void ep2_mul_gls_imp(ep2_t r, const ep2_t p, const bn_t k) { bn_mod(_k[0], k, n); bn_rec_frb(_k, 4, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - ep2_norm(q[0], p); - ep2_frb(q[1], q[0], 1); - ep2_frb(q[2], q[1], 1); - ep2_frb(q[3], q[2], 1); - l = 0; for (size_t i = 0; i < 4; i++) { _l[i] = RLC_FP_BITS + 1; bn_rec_naf(naf[i], &_l[i], _k[i], RLC_WIDTH); l = RLC_MAX(l, _l[i]); if (i == 0) { + ep2_norm(q, p); if (bn_sign(_k[0]) == RLC_NEG) { - ep2_neg(q[0], q[0]); + ep2_neg(q, q); } - ep2_tab(t[0], q[0], RLC_WIDTH); + ep2_tab(t[0], q, RLC_WIDTH); } else { for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { ep2_frb(t[i][j], t[i - 1][j], 1); @@ -116,9 +112,9 @@ static void ep2_mul_gls_imp(ep2_t r, const ep2_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); + ep2_free(q); for (size_t i = 0; i < 4; i++) { bn_free(_k[i]); - ep2_free(q[i]); for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { ep2_free(t[i][j]); } diff --git a/src/pc/relic_pc_exp.c b/src/pc/relic_pc_exp.c index 05d854385..22fc69fbf 100644 --- a/src/pc/relic_pc_exp.c +++ b/src/pc/relic_pc_exp.c @@ -114,21 +114,20 @@ static void gt_psi(gt_t c, const gt_t a) { * @param[in] b - the exponent. * @param[in] f - the maximum Frobenius power. */ -void gt_exp_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { - int8_t c0, n0, *reg = RLC_ALLOCA(int8_t, f * (RLC_FP_BITS + 1)); - int8_t *e = RLC_ALLOCA(int8_t, f), *s = RLC_ALLOCA(int8_t, f); - gt_t q, w, *t = RLC_ALLOCA(gt_t, f * RLC_GT_TABLE); +void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { + int8_t *naf = RLC_ALLOCA(int8_t, f * (RLC_FP_BITS + 1)); + int8_t n0, *s = RLC_ALLOCA(int8_t, f); + gt_t q, *t = RLC_ALLOCA(gt_t, f * RLC_GT_TABLE); bn_t n, u, *_b = RLC_ALLOCA(bn_t, f); - size_t l, len, *_l = RLC_ALLOCA(size_t, f); + size_t l, *_l = RLC_ALLOCA(size_t, f); - if (reg == NULL || e == NULL || t == NULL || _b == NULL || _l == NULL) { + if (naf == NULL || t == NULL || _b == NULL || _l == NULL) { RLC_THROW(ERR_NO_MEMORY); return; } if (bn_is_zero(b)) { - RLC_FREE(reg); - RLC_FREE(e); + RLC_FREE(naf); RLC_FREE(s); RLC_FREE(t); RLC_FREE(_b); @@ -139,13 +138,11 @@ void gt_exp_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_null(n); bn_null(u); gt_null(q); - gt_null(w); RLC_TRY { bn_new(n); bn_new(u); gt_new(q); - gt_new(w); for (size_t i = 0; i < f; i++) { bn_null(_b[i]); bn_new(_b[i]); @@ -171,16 +168,11 @@ void gt_exp_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_rec_frb(_b, f, _b[0], u, n, ep_curve_is_pairf() == EP_BN); l = 0; - len = bn_bits(u) + (ep_curve_is_pairf() == EP_BN); gt_copy(t[0], a); for (size_t i = 0; i < f; i++) { s[i] = bn_sign(_b[i]); - bn_abs(_b[i], _b[i]); - e[i] = bn_is_even(_b[i]); - _b[i]->dp[0] |= e[i]; - _l[i] = RLC_FP_BITS + 1; - bn_rec_reg(reg + i * (RLC_FP_BITS + 1), &_l[i], _b[i], len, RLC_WIDTH); + bn_rec_naf(naf + i * (RLC_FP_BITS + 1), &_l[i], _b[i], RLC_WIDTH); l = RLC_MAX(l, _l[i]); /* Apply Frobenius before flipping sign to build table. */ if (i > 0) { @@ -188,48 +180,43 @@ void gt_exp_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { } } - for (size_t i = 0; i < f; i++) { - gt_inv(q, t[i * RLC_GT_TABLE]); - gt_copy_sec(q, t[i * RLC_GT_TABLE], s[i] == RLC_POS); - if (RLC_WIDTH > 2) { - gt_sqr(t[i * RLC_GT_TABLE], q); - gt_mul(t[i * RLC_GT_TABLE + 1], t[i * RLC_GT_TABLE], q); - for (size_t j = 2; j < RLC_GT_TABLE; j++) { - gt_mul(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j - 1], - t[i * (RLC_GT_TABLE)]); + gt_copy(q, a); + if (s[0] == RLC_NEG) { + gt_inv(q, q); + } + if (RLC_WIDTH > 2) { + gt_sqr(t[0], q); + gt_mul(t[1], t[0], q); + for (size_t j = 2; j < RLC_GT_TABLE; j++) { + gt_mul(t[j], t[j - 1], t[0]); + } + } + gt_copy(t[0], q); + for (size_t i = 1; i < f; i++) { + for (size_t j = 0; j < RLC_GT_TABLE; j++) { + gt_frb(t[i * RLC_GT_TABLE + j], + t[(i - 1) * RLC_GT_TABLE + j], 1); + if (s[i] != s[i - 1]) { + gt_inv(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j]); } } - gt_copy(t[i * RLC_GT_TABLE], q); } gt_set_unity(c); for (int j = l - 1; j >= 0; j--) { - for (size_t i = 0; i < RLC_WIDTH - 1; i++) { - gt_sqr(c, c); - } + gt_sqr(c, c); for (size_t i = 0; i < f; i++) { - n0 = reg[i * (RLC_FP_BITS + 1) + j]; - c0 = (n0 >> 7); - n0 = ((n0 ^ c0) - c0) >> 1; - - for (size_t m = 0; m < RLC_GT_TABLE; m++) { - gt_copy_sec(w, t[i * RLC_GT_TABLE + m], m == n0); + n0 = naf[i * (RLC_FP_BITS + 1) + j]; + if (n0 > 0) { + gt_mul(c, c, t[i * RLC_GT_TABLE + n0 / 2]); + } + if (n0 < 0) { + gt_inv(q, t[i * RLC_GT_TABLE - n0 / 2]); + gt_mul(c, c, q); } - - gt_inv(q, w); - gt_copy_sec(q, w, c0 == 0); - gt_mul(c, c, q); - } } - - for (size_t i = 0; i < f; i++) { - /* Tables are built with points already negated, so no need here. */ - gt_inv(q, t[i * RLC_GT_TABLE]); - gt_mul(q, c, q); - gt_copy_sec(c, q, e[i]); - } } RLC_CATCH_ANY { RLC_THROW(ERR_CAUGHT); @@ -238,15 +225,13 @@ void gt_exp_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_free(n); bn_free(u); gt_free(q); - gt_free(w); for (size_t i = 0; i < f; i++) { bn_free(_b[i]); for (size_t j = 0; j < RLC_GT_TABLE; j++) { gt_free(t[i * RLC_GT_TABLE + j]); } } - RLC_FREE(reg); - RLC_FREE(e); + RLC_FREE(naf); RLC_FREE(s); RLC_FREE(t); RLC_FREE(_b); @@ -254,11 +239,6 @@ void gt_exp_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { } } -/** - * Size of a precomputation table using the double-table comb method. - */ -#define RLC_GT_TABLE (1 << (RLC_WIDTH - 2)) - /** * Exponentiates an element from G_T in constant time. * @@ -267,20 +247,21 @@ void gt_exp_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { * @param[in] b - the exponent. * @param[in] f - the maximum Frobenius power. */ -void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { - int8_t *naf = RLC_ALLOCA(int8_t, f * (RLC_FP_BITS + 1)); - int8_t n0, *s = RLC_ALLOCA(int8_t, f); - gt_t q, *t = RLC_ALLOCA(gt_t, f * RLC_GT_TABLE); +void gt_exp_reg_gls(gt_t c, const gt_t a, const bn_t b, size_t f) { + int8_t c0, n0, *reg = RLC_ALLOCA(int8_t, f * (RLC_FP_BITS + 1)); + int8_t *e = RLC_ALLOCA(int8_t, f), *s = RLC_ALLOCA(int8_t, f); + gt_t q, w, *t = RLC_ALLOCA(gt_t, f * RLC_GT_TABLE); bn_t n, u, *_b = RLC_ALLOCA(bn_t, f); - size_t l, *_l = RLC_ALLOCA(size_t, f); + size_t l, len, *_l = RLC_ALLOCA(size_t, f); - if (naf == NULL || t == NULL || _b == NULL || _l == NULL) { + if (reg == NULL || e == NULL || t == NULL || _b == NULL || _l == NULL) { RLC_THROW(ERR_NO_MEMORY); return; } if (bn_is_zero(b)) { - RLC_FREE(naf); + RLC_FREE(reg); + RLC_FREE(e); RLC_FREE(s); RLC_FREE(t); RLC_FREE(_b); @@ -291,11 +272,13 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_null(n); bn_null(u); gt_null(q); + gt_null(w); RLC_TRY { bn_new(n); bn_new(u); gt_new(q); + gt_new(w); for (size_t i = 0; i < f; i++) { bn_null(_b[i]); bn_new(_b[i]); @@ -321,49 +304,66 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_rec_frb(_b, f, _b[0], u, n, ep_curve_is_pairf() == EP_BN); l = 0; - gt_copy(t[0], a); + len = bn_bits(u) + (ep_curve_is_pairf() == EP_BN); for (size_t i = 0; i < f; i++) { s[i] = bn_sign(_b[i]); - bn_abs(_b[i], _b[i]); + e[i] = bn_is_even(_b[i]); + _b[i]->dp[0] |= e[i]; _l[i] = RLC_FP_BITS + 1; - bn_rec_naf(naf + i * (RLC_FP_BITS + 1), &_l[i], _b[i], RLC_WIDTH); + bn_rec_reg(reg + i * (RLC_FP_BITS + 1), &_l[i], _b[i], len, RLC_WIDTH); l = RLC_MAX(l, _l[i]); - /* Apply Frobenius before flipping sign to build table. */ - if (i > 0) { - gt_psi(t[i * RLC_GT_TABLE], t[(i - 1) * RLC_GT_TABLE]); - } } - for (size_t i = 0; i < f; i++) { - gt_inv(q, t[i * RLC_GT_TABLE]); - gt_copy_sec(q, t[i * RLC_GT_TABLE], s[i] == RLC_POS); - if (RLC_WIDTH > 2) { - gt_sqr(t[i * RLC_GT_TABLE], q); - gt_mul(t[i * RLC_GT_TABLE + 1], t[i * RLC_GT_TABLE], q); - for (size_t j = 2; j < RLC_GT_TABLE; j++) { - gt_mul(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j - 1], - t[i * (RLC_GT_TABLE)]); + gt_copy(t[0], a); + gt_inv(q, t[0]); + gt_copy_sec(q, t[0], s[0] == RLC_POS); + if (RLC_WIDTH > 2) { + gt_sqr(t[0], q); + gt_mul(t[1], t[0], q); + for (size_t j = 2; j < RLC_GT_TABLE; j++) { + gt_mul(t[j], t[j - 1], t[0]); + } + } + gt_copy(t[0], q); + for (size_t i = 1; i < f; i++) { + for (size_t j = 0; j < RLC_GT_TABLE; j++) { + gt_frb(t[i * RLC_GT_TABLE + j], + t[(i - 1) * RLC_GT_TABLE + j], 1); + if (s[i] != s[i - 1]) { + gt_inv(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j]); } } - gt_copy(t[i * RLC_GT_TABLE], q); } gt_set_unity(c); for (int j = l - 1; j >= 0; j--) { - gt_sqr(c, c); + for (size_t i = 0; i < RLC_WIDTH - 1; i++) { + gt_sqr(c, c); + } for (size_t i = 0; i < f; i++) { - n0 = naf[i * (RLC_FP_BITS + 1) + j]; - if (n0 > 0) { - gt_mul(c, c, t[i * RLC_GT_TABLE + n0 / 2]); - } - if (n0 < 0) { - gt_inv(q, t[i * RLC_GT_TABLE - n0 / 2]); - gt_mul(c, c, q); + n0 = reg[i * (RLC_FP_BITS + 1) + j]; + c0 = (n0 >> 7); + n0 = ((n0 ^ c0) - c0) >> 1; + + for (size_t m = 0; m < RLC_GT_TABLE; m++) { + gt_copy_sec(w, t[i * RLC_GT_TABLE + m], m == n0); } + + gt_inv(q, w); + gt_copy_sec(q, w, c0 == 0); + gt_mul(c, c, q); + } } + + for (size_t i = 0; i < f; i++) { + /* Tables are built with points already negated, so no need here. */ + gt_inv(q, t[i * RLC_GT_TABLE]); + gt_mul(q, c, q); + gt_copy_sec(c, q, e[i]); + } } RLC_CATCH_ANY { RLC_THROW(ERR_CAUGHT); @@ -372,13 +372,15 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_free(n); bn_free(u); gt_free(q); + gt_free(w); for (size_t i = 0; i < f; i++) { bn_free(_b[i]); for (size_t j = 0; j < RLC_GT_TABLE; j++) { gt_free(t[i * RLC_GT_TABLE + j]); } } - RLC_FREE(naf); + RLC_FREE(reg); + RLC_FREE(e); RLC_FREE(s); RLC_FREE(t); RLC_FREE(_b); @@ -522,7 +524,7 @@ void gt_exp_sec(gt_t c, const gt_t a, const bn_t b) { } #if FP_PRIME <= 1536 - gt_exp_imp(c, a, b, ep_curve_frdim()); + gt_exp_reg_gls(c, a, b, ep_curve_frdim()); #else RLC_CAT(RLC_GT_LOWER, exp_monty)(c, a, b); #endif From ef64ae76c1a239560567383a0a0c186888fca390 Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Mon, 12 Aug 2024 15:09:08 +0200 Subject: [PATCH 03/18] Fix build error. --- src/low/x64-asm-8l/relic_bn_mul_low.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/low/x64-asm-8l/relic_bn_mul_low.c b/src/low/x64-asm-8l/relic_bn_mul_low.c index bacd3d645..0839f9010 100644 --- a/src/low/x64-asm-8l/relic_bn_mul_low.c +++ b/src/low/x64-asm-8l/relic_bn_mul_low.c @@ -56,5 +56,5 @@ void bn_muld_low(dig_t *c, const dig_t *a, size_t sa, const dig_t *b, size_t sb, int low, int high) { (void)low; (void)high; - mpn_mul(c, a, sizea, b, sizeb); + mpn_mul(c, a, sa, b, sb); } From 25fc38ac383ffe7addb07fffc9973e45f8396671 Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Mon, 12 Aug 2024 15:12:11 +0200 Subject: [PATCH 04/18] Remove compile warning. --- src/pc/relic_pc_exp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pc/relic_pc_exp.c b/src/pc/relic_pc_exp.c index 22fc69fbf..7b22a5648 100644 --- a/src/pc/relic_pc_exp.c +++ b/src/pc/relic_pc_exp.c @@ -181,7 +181,7 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { } gt_copy(q, a); - if (s[0] == RLC_NEG) { + if (bn_sign(_b[0]) == RLC_NEG) { gt_inv(q, q); } if (RLC_WIDTH > 2) { From 1c86c8f9df03fe4ef54cde8b652304d12cc0a74c Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Mon, 12 Aug 2024 15:12:59 +0200 Subject: [PATCH 05/18] Remove another warning. --- src/pc/relic_pc_exp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pc/relic_pc_exp.c b/src/pc/relic_pc_exp.c index 7b22a5648..0eeadb35d 100644 --- a/src/pc/relic_pc_exp.c +++ b/src/pc/relic_pc_exp.c @@ -317,7 +317,7 @@ void gt_exp_reg_gls(gt_t c, const gt_t a, const bn_t b, size_t f) { gt_copy(t[0], a); gt_inv(q, t[0]); - gt_copy_sec(q, t[0], s[0] == RLC_POS); + gt_copy_sec(q, t[0], bn_sign(_b[0]) == RLC_POS); if (RLC_WIDTH > 2) { gt_sqr(t[0], q); gt_mul(t[1], t[0], q); From 5700c57e08cce350bf0638d79082466077576691 Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Mon, 12 Aug 2024 15:22:22 +0200 Subject: [PATCH 06/18] Fix for k=16. --- src/pc/relic_pc_exp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/pc/relic_pc_exp.c b/src/pc/relic_pc_exp.c index 0eeadb35d..c86980020 100644 --- a/src/pc/relic_pc_exp.c +++ b/src/pc/relic_pc_exp.c @@ -194,8 +194,7 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { gt_copy(t[0], q); for (size_t i = 1; i < f; i++) { for (size_t j = 0; j < RLC_GT_TABLE; j++) { - gt_frb(t[i * RLC_GT_TABLE + j], - t[(i - 1) * RLC_GT_TABLE + j], 1); + gt_psi(t[i * RLC_GT_TABLE + j], t[(i - 1) * RLC_GT_TABLE + j]); if (s[i] != s[i - 1]) { gt_inv(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j]); } @@ -328,8 +327,7 @@ void gt_exp_reg_gls(gt_t c, const gt_t a, const bn_t b, size_t f) { gt_copy(t[0], q); for (size_t i = 1; i < f; i++) { for (size_t j = 0; j < RLC_GT_TABLE; j++) { - gt_frb(t[i * RLC_GT_TABLE + j], - t[(i - 1) * RLC_GT_TABLE + j], 1); + gt_psi(t[i * RLC_GT_TABLE + j], t[(i - 1) * RLC_GT_TABLE + j]); if (s[i] != s[i - 1]) { gt_inv(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j]); } From c8f97316a0f57f554feae4b489eb4787753d827a Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Mon, 12 Aug 2024 16:22:29 +0200 Subject: [PATCH 07/18] Implement GLV-SAC for E(Fp3). --- bench/bench_pc.c | 6 ++ src/epx/relic_ep3_mul.c | 146 +++++++++++++++++++++------------------- src/pc/relic_pc_exp.c | 4 -- 3 files changed, 83 insertions(+), 73 deletions(-) diff --git a/bench/bench_pc.c b/bench/bench_pc.c index bb040c6f4..0ac430d47 100755 --- a/bench/bench_pc.c +++ b/bench/bench_pc.c @@ -684,6 +684,12 @@ static void arith(void) { } BENCH_END; + BENCH_RUN("gt_frb (1)") { + gt_rand(a); + BENCH_ADD(gt_frb(c, a, 1)); + } + BENCH_END; + BENCH_RUN("gt_exp") { gt_rand(a); pc_get_ord(d); diff --git a/src/epx/relic_ep3_mul.c b/src/epx/relic_ep3_mul.c index a814664fb..b7d6cb37c 100644 --- a/src/epx/relic_ep3_mul.c +++ b/src/epx/relic_ep3_mul.c @@ -85,7 +85,7 @@ static void ep3_psi(ep3_t r, const ep3_t p) { #if EP_MUL == LWNAF || !defined(STRIP) -static void ep3_mul_glv_imp(ep3_t r, const ep3_t p, const bn_t k) { +static void ep3_mul_gls_imp(ep3_t r, const ep3_t p, const bn_t k) { int i, j; size_t l, _l[6]; bn_t n, _k[6], u; @@ -134,6 +134,7 @@ static void ep3_mul_glv_imp(ep3_t r, const ep3_t p, const bn_t k) { l = RLC_MAX(l, _l[i]); } + /* We use w = 2 for the NAF because of the expensive endomomorphisms. */ ep3_set_infty(r); for (j = l - 1; j >= 0; j--) { ep3_dbl(r, r); @@ -170,100 +171,108 @@ static void ep3_mul_glv_imp(ep3_t r, const ep3_t p, const bn_t k) { #if EP_MUL == LWREG || !defined(STRIP) static void ep3_mul_reg_gls(ep3_t r, const ep3_t p, const bn_t k) { - int8_t reg[6][RLC_FP_BITS + 1], b[6], s[6], c0, n0; - ep3_t q, w, t[6][1 << (RLC_WIDTH - 2)]; + size_t l; bn_t n, _k[6], u; - size_t l, len, _l[6]; + int8_t even, col, sac[6 * (RLC_FP_BITS + 1)]; + ep3_t q[6], t[1 << 5]; bn_null(n); bn_null(u); - ep3_null(q); - ep3_null(w); RLC_TRY { bn_new(n); bn_new(u); - ep3_new(q); - ep3_new(w); - for (size_t i = 0; i < 6; i++) { + for (int i = 0; i < 6; i++) { bn_null(_k[i]); + ep3_null(q[i]); bn_new(_k[i]); - for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { - ep3_null(t[i][j]); - ep3_new(t[i][j]); - } + ep3_new(q[i]); + } + for (int i = 0; i < (1 << 5); i++) { + ep3_null(t[i]); + ep3_new(t[i]); } ep3_curve_get_ord(n); fp_prime_get_par(u); + if (ep_curve_is_pairf() == EP_SG18) { + /* Compute base -3*u for the recoding below. */ + bn_dbl(n, u); + bn_add(u, u, n); + bn_neg(u, u); + } bn_mod(_k[0], k, n); bn_rec_frb(_k, 6, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - - l = 0; - /* Make some extra room for BN curves that grow subscalars by 1. */ - len = bn_bits(u) + (ep_curve_is_pairf() == EP_BN); - ep3_norm(t[0][0], p); + ep3_norm(q[0], p); for (size_t i = 0; i < 6; i++) { - s[i] = bn_sign(_k[i]); - bn_abs(_k[i], _k[i]); - b[i] = bn_is_even(_k[i]); - _k[i]->dp[0] |= b[i]; - - _l[i] = RLC_FP_BITS + 1; - bn_rec_reg(reg[i], &_l[i], _k[i], len, RLC_WIDTH); - l = RLC_MAX(l, _l[i]); - - /* Apply Frobenius before flipping sign to build table. */ + ep3_neg(r, q[i]); + fp3_copy_sec(q[i]->y, r->y, bn_sign(_k[i]) == RLC_NEG); + _k[i]->sign = RLC_POS; if (i > 0) { - ep3_psi(t[i][0], t[i - 1][0]); + ep3_psi(q[i], q[i - 1]); } } + even = bn_is_even(_k[0]); + bn_add_dig(_k[0], _k[0], even); - for (size_t i = 0; i < 6; i++) { - ep3_neg(q, t[i][0]); - fp3_copy_sec(q->y, t[i][0]->y, s[i] == RLC_POS); - ep3_tab(t[i], q, RLC_WIDTH); + ep3_copy(t[0], q[0]); + for (size_t i = 1; i < (1 << 5); i++) { + l = util_bits_dig(i); + ep3_add(t[i], t[i ^ (1 << (l - 1))], q[l]); } + l = RLC_FP_BITS + 1; + bn_rec_sac(sac, &l, _k, 6, n); + #if defined(EP_MIXED) - fp3_set_dig(w->z, 1); - w->coord = BASIC; + ep3_norm_sim(t + 1, t + 1, (1 << 5) - 1); + fp3_set_dig(r->z, 1); + fp3_set_dig(q[1]->z, 1); + r->coord = q[1]->coord = BASIC; #else - w->coord = = EP_ADD; + r->coord = q[1]->coord = EP_ADD; #endif - ep3_set_infty(r); - for (int j = l - 1; j >= 0; j--) { - for (size_t i = 0; i < RLC_WIDTH - 1; i++) { - ep3_dbl(r, r); - } + col = 0; + for (int i = 5; i > 0; i--) { + col <<= 1; + col += sac[i * l + l - 1]; + } + for (size_t m = 0; m < (1 << 5); m++) { + fp3_copy_sec(r->x, t[m]->x, m == col); + fp3_copy_sec(r->y, t[m]->y, m == col); +#if !defined(EP_MIXED) + fp3_copy_sec(r->z, t[m]->z, m == col); +#endif + } - for (size_t i = 0; i < 6; i++) { - n0 = reg[i][j]; - c0 = (n0 >> 7); - n0 = ((n0 ^ c0) - c0) >> 1; - - for (size_t m = 0; m < (1 << (RLC_WIDTH - 2)); m++) { - fp3_copy_sec(w->x, t[i][m]->x, m == n0); - fp3_copy_sec(w->y, t[i][m]->y, m == n0); - #if !defined(EP_MIXED) - fp3_copy_sec(w->z, t[i][m]->z, m == n0); - #endif - } + ep3_neg(q[1], r); + fp3_copy_sec(r->y, q[1]->y, sac[l - 1] != 0); + for (int j = l - 2; j >= 0; j--) { + ep3_dbl(r, r); - ep3_neg(q, w); - fp3_copy_sec(q->y, w->y, c0 == 0); - ep3_add(r, r, q); + col = 0; + for (int i = 5; i > 0; i--) { + col <<= 1; + col += sac[i * l + j]; } + + for (size_t m = 0; m < (1 << 5); m++) { + fp3_copy_sec(q[1]->x, t[m]->x, m == col); + fp3_copy_sec(q[1]->y, t[m]->y, m == col); +#if !defined(EP_MIXED) + fp3_copy_sec(q[1]->z, t[m]->z, m == col); +#endif + } + ep3_neg(q[2], q[1]); + fp3_copy_sec(q[1]->y, q[2]->y, sac[j]); + ep3_add(r, r, q[1]); } - for (size_t i = 0; i < 6; i++) { - /* Tables are built with points already negated, so no need here. */ - ep3_sub(q, r, t[i][0]); - fp3_copy_sec(r->x, q->x, b[i]); - fp3_copy_sec(r->y, q->y, b[i]); - fp3_copy_sec(r->z, q->z, b[i]); - } + ep3_sub(q[1], r, q[0]); + fp3_copy_sec(r->x, q[1]->x, even); + fp3_copy_sec(r->y, q[1]->y, even); + fp3_copy_sec(r->z, q[1]->z, even); /* Convert r to affine coordinates. */ ep3_norm(r, r); @@ -274,13 +283,12 @@ static void ep3_mul_reg_gls(ep3_t r, const ep3_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - ep3_free(q); - ep3_free(w); for (int i = 0; i < 6; i++) { bn_free(_k[i]); - for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { - ep3_free(t[i][j]); - } + ep3_free(q[i]); + } + for (int i = 0; i < (1 << 5); i++) { + ep3_free(t[i]); } } } @@ -652,7 +660,7 @@ void ep3_mul_lwnaf(ep3_t r, const ep3_t p, const bn_t k) { #if defined(EP_ENDOM) if (ep_curve_is_endom()) { - ep3_mul_glv_imp(r, p, k); + ep3_mul_gls_imp(r, p, k); return; } #endif diff --git a/src/pc/relic_pc_exp.c b/src/pc/relic_pc_exp.c index c86980020..d15208286 100644 --- a/src/pc/relic_pc_exp.c +++ b/src/pc/relic_pc_exp.c @@ -174,10 +174,6 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { _l[i] = RLC_FP_BITS + 1; bn_rec_naf(naf + i * (RLC_FP_BITS + 1), &_l[i], _b[i], RLC_WIDTH); l = RLC_MAX(l, _l[i]); - /* Apply Frobenius before flipping sign to build table. */ - if (i > 0) { - gt_psi(t[i * RLC_GT_TABLE], t[(i - 1) * RLC_GT_TABLE]); - } } gt_copy(q, a); From 6cd3777b7d9c818e473e27f2eb5d1f5007b8816b Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Mon, 12 Aug 2024 16:37:07 +0200 Subject: [PATCH 08/18] Speedup for G1 in other curves. --- src/epx/relic_ep4_mul.c | 57 ++++++++++++++++----------- src/epx/relic_ep8_mul.c | 53 +++++++++++++++---------- src/low/x64-asm-8l/relic_bn_mul_low.c | 2 +- 3 files changed, 67 insertions(+), 45 deletions(-) diff --git a/src/epx/relic_ep4_mul.c b/src/epx/relic_ep4_mul.c index a6cb8a5ba..e010ac30f 100644 --- a/src/epx/relic_ep4_mul.c +++ b/src/epx/relic_ep4_mul.c @@ -82,23 +82,27 @@ static void ep4_psi(ep4_t r, const ep4_t p) { #if EP_MUL == LWNAF || !defined(STRIP) -static void ep4_mul_glv_imp(ep4_t r, const ep4_t p, const bn_t k) { +static void ep4_mul_gls_imp(ep4_t r, const ep4_t p, const bn_t k) { size_t l, _l[8]; bn_t n, _k[8], u; int8_t naf[8][RLC_FP_BITS + 1]; - ep4_t q[8]; + ep4_t q, t[8][1 << (RLC_WIDTH - 2)]; bn_null(n); bn_null(u); + ep4_null(q); RLC_TRY { bn_new(n); bn_new(u); - for (int i = 0; i < 8; i++) { + ep4_new(q); + for (size_t i = 0; i < 8; i++) { bn_null(_k[i]); - ep4_null(q[i]); bn_new(_k[i]); - ep4_new(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep4_null(t[i][j]); + ep4_new(t[i][j]); + } } ep4_curve_get_ord(n); @@ -106,34 +110,37 @@ static void ep4_mul_glv_imp(ep4_t r, const ep4_t p, const bn_t k) { bn_mod(_k[0], k, n); bn_rec_frb(_k, 8, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - ep4_norm(q[0], p); - for (size_t i = 1; i < 8; i++) { - ep4_psi(q[i], q[i - 1]); - } -#if defined(EP_MIXED) - ep4_norm_sim(q + 1, q + 1, 7); -#endif - l = 0; for (size_t i = 0; i < 8; i++) { - if (bn_sign(_k[i]) == RLC_NEG) { - ep4_neg(q[i], q[i]); - } _l[i] = RLC_FP_BITS + 1; - bn_rec_naf(naf[i], &_l[i], _k[i], 2); + bn_rec_naf(naf[i], &_l[i], _k[i], RLC_WIDTH); l = RLC_MAX(l, _l[i]); + if (i == 0) { + ep4_norm(q, p); + if (bn_sign(_k[0]) == RLC_NEG) { + ep4_neg(q, q); + } + ep4_tab(t[0], q, RLC_WIDTH); + } else { + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep4_frb(t[i][j], t[i - 1][j], 1); + if (bn_sign(_k[i]) != bn_sign(_k[i - 1])) { + ep4_neg(t[i][j], t[i][j]); + } + } + } } ep4_set_infty(r); for (int j = l - 1; j >= 0; j--) { ep4_dbl(r, r); - for (int i = 0; i < 8; i++) { + for (size_t i = 0; i < 8; i++) { if (naf[i][j] > 0) { - ep4_add(r, r, q[i]); + ep4_add(r, r, t[i][naf[i][j] / 2]); } if (naf[i][j] < 0) { - ep4_sub(r, r, q[i]); + ep4_sub(r, r, t[i][-naf[i][j] / 2]); } } } @@ -147,11 +154,13 @@ static void ep4_mul_glv_imp(ep4_t r, const ep4_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - for (int i = 0; i < 8; i++) { + ep4_free(q); + for (size_t i = 0; i < 8; i++) { bn_free(_k[i]); - ep4_free(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep4_free(t[i][j]); + } } - } } @@ -647,7 +656,7 @@ void ep4_mul_lwnaf(ep4_t r, const ep4_t p, const bn_t k) { #if defined(EP_ENDOM) if (ep_curve_is_endom()) { - ep4_mul_glv_imp(r, p, k); + ep4_mul_gls_imp(r, p, k); return; } #endif diff --git a/src/epx/relic_ep8_mul.c b/src/epx/relic_ep8_mul.c index 5300e8933..c741b7c68 100644 --- a/src/epx/relic_ep8_mul.c +++ b/src/epx/relic_ep8_mul.c @@ -40,23 +40,27 @@ #if EP_MUL == LWNAF || !defined(STRIP) -static void ep8_mul_glv_imp(ep8_t r, const ep8_t p, const bn_t k) { +static void ep8_mul_gls_imp(ep8_t r, const ep8_t p, const bn_t k) { size_t l, _l[16]; bn_t n, _k[16], u; int8_t naf[16][RLC_FP_BITS + 1]; - ep8_t q[16]; + ep8_t q, t[16][1 << (RLC_WIDTH - 2)]; bn_null(n); bn_null(u); + ep8_null(q); RLC_TRY { bn_new(n); bn_new(u); - for (int i = 0; i < 16; i++) { + ep8_new(q); + for (size_t i = 0; i < 16; i++) { bn_null(_k[i]); - ep8_null(q[i]); bn_new(_k[i]); - ep8_new(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep8_null(t[i][j]); + ep8_new(t[i][j]); + } } ep8_curve_get_ord(n); @@ -64,31 +68,37 @@ static void ep8_mul_glv_imp(ep8_t r, const ep8_t p, const bn_t k) { bn_mod(_k[0], k, n); bn_rec_frb(_k, 16, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - ep8_norm(q[0], p); - for (size_t i = 1; i < 16; i++) { - ep8_frb(q[i], q[i - 1], 1); - } - l = 0; for (size_t i = 0; i < 16; i++) { - if (bn_sign(_k[i]) == RLC_NEG) { - ep8_neg(q[i], q[i]); - } _l[i] = RLC_FP_BITS + 1; - bn_rec_naf(naf[i], &_l[i], _k[i], 2); + bn_rec_naf(naf[i], &_l[i], _k[i], RLC_WIDTH); l = RLC_MAX(l, _l[i]); + if (i == 0) { + ep8_norm(q, p); + if (bn_sign(_k[0]) == RLC_NEG) { + ep8_neg(q, q); + } + ep8_tab(t[0], q, RLC_WIDTH); + } else { + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep8_frb(t[i][j], t[i - 1][j], 1); + if (bn_sign(_k[i]) != bn_sign(_k[i - 1])) { + ep8_neg(t[i][j], t[i][j]); + } + } + } } ep8_set_infty(r); for (int j = l - 1; j >= 0; j--) { ep8_dbl(r, r); - for (int i = 0; i < 16; i++) { + for (size_t i = 0; i < 16; i++) { if (naf[i][j] > 0) { - ep8_add(r, r, q[i]); + ep8_add(r, r, t[i][naf[i][j] / 2]); } if (naf[i][j] < 0) { - ep8_sub(r, r, q[i]); + ep8_sub(r, r, t[i][-naf[i][j] / 2]); } } } @@ -102,9 +112,12 @@ static void ep8_mul_glv_imp(ep8_t r, const ep8_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - for (int i = 0; i < 16; i++) { + ep8_free(q); + for (size_t i = 0; i < 16; i++) { bn_free(_k[i]); - ep8_free(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep8_free(t[i][j]); + } } } } @@ -595,7 +608,7 @@ void ep8_mul_lwnaf(ep8_t r, const ep8_t p, const bn_t k) { #if defined(EP_ENDOM) if (ep_curve_is_endom()) { - ep8_mul_glv_imp(r, p, k); + ep8_mul_gls_imp(r, p, k); return; } #endif diff --git a/src/low/x64-asm-8l/relic_bn_mul_low.c b/src/low/x64-asm-8l/relic_bn_mul_low.c index 0839f9010..2c8c26e29 100644 --- a/src/low/x64-asm-8l/relic_bn_mul_low.c +++ b/src/low/x64-asm-8l/relic_bn_mul_low.c @@ -53,7 +53,7 @@ void bn_muln_low(dig_t *c, const dig_t *a, const dig_t *b, size_t size) { } void bn_muld_low(dig_t *c, const dig_t *a, size_t sa, const dig_t *b, size_t sb, - int low, int high) { + uint_t low, uint_t high) { (void)low; (void)high; mpn_mul(c, a, sa, b, sb); From e9e5b46f8dba728e3e98fffbc29d7b4da3afec6a Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Mon, 12 Aug 2024 17:28:21 +0200 Subject: [PATCH 09/18] Revert slowdown for curves with expensive psi. --- src/pc/relic_pc_exp.c | 105 +++++++++++++++++++++++++++++------------- 1 file changed, 73 insertions(+), 32 deletions(-) diff --git a/src/pc/relic_pc_exp.c b/src/pc/relic_pc_exp.c index d15208286..054f911a8 100644 --- a/src/pc/relic_pc_exp.c +++ b/src/pc/relic_pc_exp.c @@ -168,7 +168,6 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_rec_frb(_b, f, _b[0], u, n, ep_curve_is_pairf() == EP_BN); l = 0; - gt_copy(t[0], a); for (size_t i = 0; i < f; i++) { s[i] = bn_sign(_b[i]); _l[i] = RLC_FP_BITS + 1; @@ -176,23 +175,45 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { l = RLC_MAX(l, _l[i]); } - gt_copy(q, a); - if (bn_sign(_b[0]) == RLC_NEG) { - gt_inv(q, q); - } - if (RLC_WIDTH > 2) { - gt_sqr(t[0], q); - gt_mul(t[1], t[0], q); - for (size_t j = 2; j < RLC_GT_TABLE; j++) { - gt_mul(t[j], t[j - 1], t[0]); + if (ep_curve_is_pairf() == EP_K16 || ep_curve_embed() == 18) { + gt_copy(t[0], a); + for (size_t i = 1; i < f; i++) { + gt_psi(t[i * RLC_GT_TABLE], t[(i - 1) * RLC_GT_TABLE]); } - } - gt_copy(t[0], q); - for (size_t i = 1; i < f; i++) { - for (size_t j = 0; j < RLC_GT_TABLE; j++) { - gt_psi(t[i * RLC_GT_TABLE + j], t[(i - 1) * RLC_GT_TABLE + j]); - if (s[i] != s[i - 1]) { - gt_inv(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j]); + for (size_t i = 0; i < f; i++) { + gt_copy(q, t[i * RLC_GT_TABLE]); + if (s[i] == RLC_NEG) { + gt_inv(q, t[i * RLC_GT_TABLE]); + } + if (RLC_WIDTH > 2) { + gt_sqr(t[i * RLC_GT_TABLE], q); + gt_mul(t[i * RLC_GT_TABLE + 1], t[i * RLC_GT_TABLE], q); + for (size_t j = 2; j < RLC_GT_TABLE; j++) { + gt_mul(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j - 1], + t[i * (RLC_GT_TABLE)]); + } + } + gt_copy(t[i * RLC_GT_TABLE], q); + } + } else { + gt_copy(q, a); + if (bn_sign(_b[0]) == RLC_NEG) { + gt_inv(q, q); + } + if (RLC_WIDTH > 2) { + gt_sqr(t[0], q); + gt_mul(t[1], t[0], q); + for (size_t j = 2; j < RLC_GT_TABLE; j++) { + gt_mul(t[j], t[j - 1], t[0]); + } + } + gt_copy(t[0], q); + for (size_t i = 1; i < f; i++) { + for (size_t j = 0; j < RLC_GT_TABLE; j++) { + gt_psi(t[i * RLC_GT_TABLE + j], t[(i - 1) * RLC_GT_TABLE + j]); + if (s[i] != s[i - 1]) { + gt_inv(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j]); + } } } } @@ -310,22 +331,42 @@ void gt_exp_reg_gls(gt_t c, const gt_t a, const bn_t b, size_t f) { l = RLC_MAX(l, _l[i]); } - gt_copy(t[0], a); - gt_inv(q, t[0]); - gt_copy_sec(q, t[0], bn_sign(_b[0]) == RLC_POS); - if (RLC_WIDTH > 2) { - gt_sqr(t[0], q); - gt_mul(t[1], t[0], q); - for (size_t j = 2; j < RLC_GT_TABLE; j++) { - gt_mul(t[j], t[j - 1], t[0]); + if (ep_curve_is_pairf() == EP_K16 || ep_curve_embed() == 18) { + gt_copy(t[0], a); + for (size_t i = 1; i < f; i++) { + gt_psi(t[i * RLC_GT_TABLE], t[(i - 1) * RLC_GT_TABLE]); } - } - gt_copy(t[0], q); - for (size_t i = 1; i < f; i++) { - for (size_t j = 0; j < RLC_GT_TABLE; j++) { - gt_psi(t[i * RLC_GT_TABLE + j], t[(i - 1) * RLC_GT_TABLE + j]); - if (s[i] != s[i - 1]) { - gt_inv(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j]); + for (size_t i = 0; i < f; i++) { + gt_inv(q, t[i * RLC_GT_TABLE]); + gt_copy_sec(q, t[i * RLC_GT_TABLE], s[i] == RLC_POS); + if (RLC_WIDTH > 2) { + gt_sqr(t[i * RLC_GT_TABLE], q); + gt_mul(t[i * RLC_GT_TABLE + 1], t[i * RLC_GT_TABLE], q); + for (size_t j = 2; j < RLC_GT_TABLE; j++) { + gt_mul(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j - 1], + t[i * (RLC_GT_TABLE)]); + } + } + gt_copy(t[i * RLC_GT_TABLE], q); + } + } else { + gt_copy(t[0], a); + gt_inv(q, t[0]); + gt_copy_sec(q, t[0], bn_sign(_b[0]) == RLC_POS); + if (RLC_WIDTH > 2) { + gt_sqr(t[0], q); + gt_mul(t[1], t[0], q); + for (size_t j = 2; j < RLC_GT_TABLE; j++) { + gt_mul(t[j], t[j - 1], t[0]); + } + } + gt_copy(t[0], q); + for (size_t i = 1; i < f; i++) { + for (size_t j = 0; j < RLC_GT_TABLE; j++) { + gt_psi(t[i * RLC_GT_TABLE + j], t[(i - 1) * RLC_GT_TABLE + j]); + if (s[i] != s[i - 1]) { + gt_inv(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j]); + } } } } From 68f415bcb984832076b1d970b6ced0bbc0bfbed6 Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Mon, 12 Aug 2024 17:32:15 +0200 Subject: [PATCH 10/18] Fix for k=16. --- src/epx/relic_ep4_mul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/epx/relic_ep4_mul.c b/src/epx/relic_ep4_mul.c index e010ac30f..614a731b3 100644 --- a/src/epx/relic_ep4_mul.c +++ b/src/epx/relic_ep4_mul.c @@ -123,7 +123,7 @@ static void ep4_mul_gls_imp(ep4_t r, const ep4_t p, const bn_t k) { ep4_tab(t[0], q, RLC_WIDTH); } else { for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { - ep4_frb(t[i][j], t[i - 1][j], 1); + ep4_psi(t[i][j], t[i - 1][j]); if (bn_sign(_k[i]) != bn_sign(_k[i - 1])) { ep4_neg(t[i][j], t[i][j]); } From 025f36895bd739935ebd2159658ebe343466e5c2 Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Tue, 13 Aug 2024 00:19:07 +0200 Subject: [PATCH 11/18] Speedups. --- src/epx/relic_ep3_mul.c | 84 ++++++++++++++++++++++++++--------------- src/epx/relic_ep4_mul.c | 20 +++++++--- 2 files changed, 68 insertions(+), 36 deletions(-) diff --git a/src/epx/relic_ep3_mul.c b/src/epx/relic_ep3_mul.c index b7d6cb37c..2f3cae7d5 100644 --- a/src/epx/relic_ep3_mul.c +++ b/src/epx/relic_ep3_mul.c @@ -86,11 +86,10 @@ static void ep3_psi(ep3_t r, const ep3_t p) { #if EP_MUL == LWNAF || !defined(STRIP) static void ep3_mul_gls_imp(ep3_t r, const ep3_t p, const bn_t k) { - int i, j; - size_t l, _l[6]; + size_t l; bn_t n, _k[6], u; - int8_t naf[6][RLC_FP_BITS + 1]; - ep3_t q[6]; + int8_t even, col, sac[6 * (RLC_FP_BITS + 1)]; + ep3_t q[6], t[1 << 5]; bn_null(n); bn_null(u); @@ -98,13 +97,18 @@ static void ep3_mul_gls_imp(ep3_t r, const ep3_t p, const bn_t k) { RLC_TRY { bn_new(n); bn_new(u); - for (i = 0; i < 6; i++) { + for (int i = 0; i < 6; i++) { bn_null(_k[i]); ep3_null(q[i]); bn_new(_k[i]); ep3_new(q[i]); } + for (int i = 0; i < (1 << 5); i++) { + ep3_null(t[i]); + ep3_new(t[i]); + } + ep3_curve_get_ord(n); fp_prime_get_par(u); if (ep_curve_is_pairf() == EP_SG18) { /* Compute base -3*u for the recoding below. */ @@ -112,41 +116,57 @@ static void ep3_mul_gls_imp(ep3_t r, const ep3_t p, const bn_t k) { bn_add(u, u, n); bn_neg(u, u); } - ep3_curve_get_ord(n); bn_mod(_k[0], k, n); bn_rec_frb(_k, 6, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - ep3_norm(q[0], p); - for (int i = 1; i < 6; i++) { - ep3_psi(q[i], q[i - 1]); + for (size_t i = 0; i < 6; i++) { + ep3_neg(r, q[i]); + fp3_copy_sec(q[i]->y, r->y, bn_sign(_k[i]) == RLC_NEG); + _k[i]->sign = RLC_POS; + if (i > 0) { + ep3_psi(q[i], q[i - 1]); + } } -#if defined(EP_MIXED) - ep3_norm_sim(q + 1, q + 1, 5); -#endif + even = bn_is_even(_k[0]); + bn_add_dig(_k[0], _k[0], even); - l = 0; - for (i = 0; i < 6; i++) { - if (bn_sign(_k[i]) == RLC_NEG) { - ep3_neg(q[i], q[i]); - } - _l[i] = RLC_FP_BITS + 1; - bn_rec_naf(naf[i], &_l[i], _k[i], 2); - l = RLC_MAX(l, _l[i]); + ep3_copy(t[0], q[0]); + for (size_t i = 1; i < (1 << 5); i++) { + l = util_bits_dig(i); + ep3_add(t[i], t[i ^ (1 << (l - 1))], q[l]); } - /* We use w = 2 for the NAF because of the expensive endomomorphisms. */ + l = RLC_FP_BITS + 1; + bn_rec_sac(sac, &l, _k, 6, n); + +#if defined(EP_MIXED) + ep3_norm_sim(t + 1, t + 1, (1 << 5) - 1); + fp3_set_dig(r->z, 1); + fp3_set_dig(q[1]->z, 1); + r->coord = q[1]->coord = BASIC; +#else + r->coord = q[1]->coord = EP_ADD; +#endif + ep3_set_infty(r); - for (j = l - 1; j >= 0; j--) { + for (int j = l - 1; j >= 0; j--) { ep3_dbl(r, r); - for (i = 0; i < 6; i++) { - if (naf[i][j] > 0) { - ep3_add(r, r, q[i]); - } - if (naf[i][j] < 0) { - ep3_sub(r, r, q[i]); - } + col = 0; + for (int i = 5; i > 0; i--) { + col <<= 1; + col += sac[i * l + j]; } + + if (sac[j]) { + ep3_sub(r, r, t[col]); + } else { + ep3_add(r, r, t[col]); + } + } + + if (even) { + ep3_sub(r, r, q[0]); } /* Convert r to affine coordinates. */ @@ -158,11 +178,13 @@ static void ep3_mul_gls_imp(ep3_t r, const ep3_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - for (i = 0; i < 3; i++) { + for (int i = 0; i < 6; i++) { bn_free(_k[i]); ep3_free(q[i]); } - + for (int i = 0; i < (1 << 5); i++) { + ep3_free(t[i]); + } } } diff --git a/src/epx/relic_ep4_mul.c b/src/epx/relic_ep4_mul.c index 614a731b3..ed667d738 100644 --- a/src/epx/relic_ep4_mul.c +++ b/src/epx/relic_ep4_mul.c @@ -115,13 +115,23 @@ static void ep4_mul_gls_imp(ep4_t r, const ep4_t p, const bn_t k) { _l[i] = RLC_FP_BITS + 1; bn_rec_naf(naf[i], &_l[i], _k[i], RLC_WIDTH); l = RLC_MAX(l, _l[i]); - if (i == 0) { - ep4_norm(q, p); - if (bn_sign(_k[0]) == RLC_NEG) { + } + ep4_norm(q, p); + if (bn_sign(_k[0]) == RLC_NEG) { + ep4_neg(q, q); + } + ep4_tab(t[0], q, RLC_WIDTH); + + if (ep_curve_is_pairf() == EP_K16) { + for (size_t i = 1; i < 8; i++) { + ep4_psi(q, t[i - 1][0]); + if (bn_sign(_k[i]) == RLC_NEG) { ep4_neg(q, q); } - ep4_tab(t[0], q, RLC_WIDTH); - } else { + ep4_tab(t[i], q, RLC_WIDTH); + } + } else { + for (size_t i = 1; i < 8; i++) { for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { ep4_psi(t[i][j], t[i - 1][j]); if (bn_sign(_k[i]) != bn_sign(_k[i - 1])) { From b10be2f4a570d8a5f2ab4a83bfeb5814ca9303f5 Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Tue, 13 Aug 2024 01:09:42 +0200 Subject: [PATCH 12/18] Fix. --- src/epx/relic_ep3_mul.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/epx/relic_ep3_mul.c b/src/epx/relic_ep3_mul.c index 2f3cae7d5..4cf4b2828 100644 --- a/src/epx/relic_ep3_mul.c +++ b/src/epx/relic_ep3_mul.c @@ -118,17 +118,18 @@ static void ep3_mul_gls_imp(ep3_t r, const ep3_t p, const bn_t k) { } bn_mod(_k[0], k, n); bn_rec_frb(_k, 6, _k[0], u, n, ep_curve_is_pairf() == EP_BN); + even = bn_is_even(_k[0]); + bn_add_dig(_k[0], _k[0], even); ep3_norm(q[0], p); + for (size_t i = 1; i < 6; i++) { + ep3_psi(q[i], q[i - 1]); + } for (size_t i = 0; i < 6; i++) { - ep3_neg(r, q[i]); - fp3_copy_sec(q[i]->y, r->y, bn_sign(_k[i]) == RLC_NEG); - _k[i]->sign = RLC_POS; - if (i > 0) { - ep3_psi(q[i], q[i - 1]); + if (bn_sign(_k[i]) == RLC_NEG) { + ep3_neg(q[i], q[i]); } + bn_abs(_k[i], _k[i]); } - even = bn_is_even(_k[0]); - bn_add_dig(_k[0], _k[0], even); ep3_copy(t[0], q[0]); for (size_t i = 1; i < (1 << 5); i++) { @@ -225,17 +226,17 @@ static void ep3_mul_reg_gls(ep3_t r, const ep3_t p, const bn_t k) { } bn_mod(_k[0], k, n); bn_rec_frb(_k, 6, _k[0], u, n, ep_curve_is_pairf() == EP_BN); + even = bn_is_even(_k[0]); + bn_add_dig(_k[0], _k[0], even); ep3_norm(q[0], p); + for (size_t i = 1; i < 6; i++) { + ep3_psi(q[i], q[i - 1]); + } for (size_t i = 0; i < 6; i++) { ep3_neg(r, q[i]); fp3_copy_sec(q[i]->y, r->y, bn_sign(_k[i]) == RLC_NEG); - _k[i]->sign = RLC_POS; - if (i > 0) { - ep3_psi(q[i], q[i - 1]); - } + bn_abs(_k[i], _k[i]); } - even = bn_is_even(_k[0]); - bn_add_dig(_k[0], _k[0], even); ep3_copy(t[0], q[0]); for (size_t i = 1; i < (1 << 5); i++) { From 9db039b87a8f3e212c603a3e2a3264caf51e96a0 Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Tue, 13 Aug 2024 02:17:16 +0200 Subject: [PATCH 13/18] Refactor GLV-SAC. --- include/relic_bn.h | 6 +- src/bn/relic_bn_rec.c | 4 +- src/epx/relic_ep2_mul.c | 4 +- src/epx/relic_ep3_mul.c | 4 +- src/epx/relic_ep4_mul.c | 191 ++++++++++++++++++++++++++-------------- test/test_bn.c | 2 +- 6 files changed, 133 insertions(+), 78 deletions(-) diff --git a/include/relic_bn.h b/include/relic_bn.h index bbfa6e740..a4fb0e278 100644 --- a/include/relic_bn.h +++ b/include/relic_bn.h @@ -1547,11 +1547,11 @@ void bn_rec_frb(bn_t *ki, int sub, const bn_t k, const bn_t x, const bn_t n, * @param[out] b - the recoded subscalars. * @param[in] len - the length in bytes of the recoding. * @param[in] k - the subscalars to recode. - * @param[in] m - the number of subscallars to recode. - * @param[in] n - the elliptic curve group order. + * @param[in] m - the number of subscalars to recode. + * @param[in] n - the bit length of the group order. * @throw ERR_NO_BUFFER - if the buffer capacity is insufficient. */ -void bn_rec_sac(int8_t *b, size_t *len, bn_t *k, size_t m, bn_t n); +void bn_rec_sac(int8_t *b, size_t *len, bn_t *k, size_t m, size_t n); /** * Computes the coefficients of the polynomial representing the Lagrange diff --git a/src/bn/relic_bn_rec.c b/src/bn/relic_bn_rec.c index 2f8b2c210..2988a4092 100644 --- a/src/bn/relic_bn_rec.c +++ b/src/bn/relic_bn_rec.c @@ -876,10 +876,10 @@ void bn_rec_glv(bn_t k0, bn_t k1, const bn_t k, const bn_t n, const bn_t *v1, } } -void bn_rec_sac(int8_t *b, size_t *len, bn_t *k, size_t m, bn_t n) { +void bn_rec_sac(int8_t *b, size_t *len, bn_t *k, size_t m, size_t n) { /* Assume k0 is the sign-aligner. */ bn_t *t = RLC_ALLOCA(bn_t, m); - size_t l = RLC_CEIL(bn_bits(n), m) + 1; + size_t l = RLC_CEIL(n, m) + 1; int8_t bji; if (t == NULL) { diff --git a/src/epx/relic_ep2_mul.c b/src/epx/relic_ep2_mul.c index 5533c5cb9..b689b4658 100644 --- a/src/epx/relic_ep2_mul.c +++ b/src/epx/relic_ep2_mul.c @@ -172,7 +172,7 @@ static void ep2_mul_reg_gls(ep2_t r, const ep2_t p, const bn_t k) { } l = RLC_FP_BITS + 1; - bn_rec_sac(sac, &l, _k, 4, n); + bn_rec_sac(sac, &l, _k, 4, bn_bits(n)); #if defined(EP_MIXED) ep2_norm_sim(t + 1, t + 1, (1 << 3) - 1); @@ -197,7 +197,7 @@ static void ep2_mul_reg_gls(ep2_t r, const ep2_t p, const bn_t k) { } ep2_neg(q[1], r); - fp2_copy_sec(r->y, q[1]->y, sac[l - 1] != 0); + fp2_copy_sec(r->y, q[1]->y, sac[l - 1]); for (int j = l - 2; j >= 0; j--) { ep2_dbl(r, r); diff --git a/src/epx/relic_ep3_mul.c b/src/epx/relic_ep3_mul.c index 4cf4b2828..b09ca701f 100644 --- a/src/epx/relic_ep3_mul.c +++ b/src/epx/relic_ep3_mul.c @@ -138,7 +138,7 @@ static void ep3_mul_gls_imp(ep3_t r, const ep3_t p, const bn_t k) { } l = RLC_FP_BITS + 1; - bn_rec_sac(sac, &l, _k, 6, n); + bn_rec_sac(sac, &l, _k, 6, bn_bits(n)); #if defined(EP_MIXED) ep3_norm_sim(t + 1, t + 1, (1 << 5) - 1); @@ -245,7 +245,7 @@ static void ep3_mul_reg_gls(ep3_t r, const ep3_t p, const bn_t k) { } l = RLC_FP_BITS + 1; - bn_rec_sac(sac, &l, _k, 6, n); + bn_rec_sac(sac, &l, _k, 6, bn_bits(n)); #if defined(EP_MIXED) ep3_norm_sim(t + 1, t + 1, (1 << 5) - 1); diff --git a/src/epx/relic_ep4_mul.c b/src/epx/relic_ep4_mul.c index ed667d738..92e951dee 100644 --- a/src/epx/relic_ep4_mul.c +++ b/src/epx/relic_ep4_mul.c @@ -179,28 +179,29 @@ static void ep4_mul_gls_imp(ep4_t r, const ep4_t p, const bn_t k) { #if EP_MUL == LWREG || !defined(STRIP) static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { - int8_t reg[8][RLC_FP_BITS + 1], b[8], s[8], c0, n0; - ep4_t q, w, t[8][1 << (RLC_WIDTH - 2)]; + size_t l; bn_t n, _k[8], u; - size_t l, len, _l[8]; + int8_t even0, even1, col; + int8_t sac0[4 * (RLC_FP_BITS + 1)], sac1[4 * (RLC_FP_BITS + 1)]; + ep4_t q[8], t0[1 << 3], t1[1 << 3] bn_null(n); bn_null(u); - ep4_null(q); - ep4_null(w); RLC_TRY { bn_new(n); bn_new(u); - ep4_new(q); - ep4_new(w); - for (size_t i = 0; i < 8; i++) { + for (int i = 0; i < 8; i++) { bn_null(_k[i]); + ep4_null(q[i]); bn_new(_k[i]); - for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { - ep4_null(t[i][j]); - ep4_new(t[i][j]); - } + ep4_new(q[i]); + } + for (int i = 0; i < (1 << 3); i++) { + ep4_null(t0[i]); + ep4_new(t0[i]); + ep4_null(t1[i]); + ep4_new(t1[i]); } ep4_curve_get_ord(n); @@ -208,71 +209,125 @@ static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { bn_mod(_k[0], k, n); bn_rec_frb(_k, 8, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - l = 0; - /* Make some extra room for BN curves that grow subscalars by 1. */ - len = bn_bits(u) + (ep_curve_is_pairf() == EP_BN); - ep4_norm(t[0][0], p); + even0 = bn_is_even(_k[0]); + bn_add_dig(_k[0], _k[0], even0); + even1 = bn_is_even(_k[4]); + bn_add_dig(_k[4], _k[4], even1); + + ep4_norm(q[0], p); + for (size_t i = 1; i < 8; i++) { + ep4_psi(q[i], q[i - 1]); + } for (size_t i = 0; i < 8; i++) { - s[i] = bn_sign(_k[i]); + ep4_neg(r, q[i]); + fp4_copy_sec(q[i]->y, r->y, bn_sign(_k[i]) == RLC_NEG); bn_abs(_k[i], _k[i]); - b[i] = bn_is_even(_k[i]); - _k[i]->dp[0] |= b[i]; - - _l[i] = RLC_FP_BITS + 1; - bn_rec_reg(reg[i], &_l[i], _k[i], len, RLC_WIDTH); - l = RLC_MAX(l, _l[i]); - - /* Apply Frobenius before flipping sign to build table. */ - if (i > 0) { - ep4_psi(t[i][0], t[i - 1][0]); - } } - for (size_t i = 0; i < 8; i++) { - ep4_neg(q, t[i][0]); - fp4_copy_sec(q->y, t[i][0]->y, s[i] == RLC_POS); - ep4_tab(t[i], q, RLC_WIDTH); + ep4_copy(t0[0], q[0]); + for (size_t i = 1; i < (1 << 3); i++) { + l = util_bits_dig(i); + ep4_add(t0[i], t0[i ^ (1 << (l - 1))], q[l]); } + ep4_copy(t1[0], q[4]); + for (size_t i = 1; i < (1 << 3); i++) { + l = util_bits_dig(i); + ep4_add(t1[i], t1[i ^ (1 << (l - 1))], q[4 + l]); + } + + l = RLC_FP_BITS + 1; + bn_rec_sac(sac0, &l, _k, 4, bn_bits(n)); + l = RLC_FP_BITS + 1; + bn_rec_sac(sac1, &l, _k + 4, 4, bn_bits(n)); #if defined(EP_MIXED) - fp4_set_dig(w->z, 1); - w->coord = BASIC; + ep4_norm_sim(t0 + 1, t0 + 1, (1 << 3) - 1); + ep4_norm_sim(t1 + 1, t1 + 1, (1 << 3) - 1); + fp4_set_dig(r->z, 1); + fp4_set_dig(q[1]->z, 1); + r->coord = q[1]->coord = BASIC; #else - w->coord = = EP_ADD; + r->coord = q[1]->coord = EP_ADD; #endif - ep4_set_infty(r); - for (int j = l - 1; j >= 0; j--) { - for (size_t i = 0; i < RLC_WIDTH - 1; i++) { - ep4_dbl(r, r); - } + col = 0; + for (int i = 3; i > 0; i--) { + col <<= 1; + col += sac0[i * l + l - 1]; + } + for (size_t m = 0; m < (1 << 3); m++) { + fp4_copy_sec(r->x, t0[m]->x, m == col); + fp4_copy_sec(r->y, t0[m]->y, m == col); +#if !defined(EP_MIXED) + fp4_copy_sec(r->z, t0[m]->z, m == col); +#endif + } + ep4_neg(q[1], r); + fp4_copy_sec(r->y, q[1]->y, sac0[l - 1]); - for (size_t i = 0; i < 8; i++) { - n0 = reg[i][j]; - c0 = (n0 >> 7); - n0 = ((n0 ^ c0) - c0) >> 1; - - for (size_t m = 0; m < (1 << (RLC_WIDTH - 2)); m++) { - fp4_copy_sec(w->x, t[i][m]->x, m == n0); - fp4_copy_sec(w->y, t[i][m]->y, m == n0); - #if !defined(EP_MIXED) - fp4_copy_sec(w->z, t[i][m]->z, m == n0); - #endif - } + col = 0; + for (int i = 3; i > 0; i--) { + col <<= 1; + col += sac1[i * l + l - 1]; + } + for (size_t m = 0; m < (1 << 3); m++) { + fp4_copy_sec(q[1]->x, t1[m]->x, m == col); + fp4_copy_sec(q[1]->y, t1[m]->y, m == col); +#if !defined(EP_MIXED) + fp4_copy_sec(q[1]->z, t1[m]->z, m == col); +#endif + } + ep4_neg(q[2], q[1]); + fp4_copy_sec(q[1]->y, q[2]->y, sac1[l - 1]); + ep4_add(r, r, q[1]); + + for (int j = l - 2; j >= 0; j--) { + ep4_dbl(r, r); - ep4_neg(q, w); - fp4_copy_sec(q->y, w->y, c0 == 0); - ep4_add(r, r, q); + col = 0; + for (int i = 3; i > 0; i--) { + col <<= 1; + col += sac0[i * l + j]; + } + + for (size_t m = 0; m < (1 << 3); m++) { + fp4_copy_sec(q[1]->x, t0[m]->x, m == col); + fp4_copy_sec(q[1]->y, t0[m]->y, m == col); +#if !defined(EP_MIXED) + fp4_copy_sec(q[1]->z, t0[m]->z, m == col); +#endif } + ep4_neg(q[2], q[1]); + fp4_copy_sec(q[1]->y, q[2]->y, sac0[j]); + ep4_add(r, r, q[1]); + + col = 0; + for (int i = 3; i > 0; i--) { + col <<= 1; + col += sac1[i * l + j]; + } + + for (size_t m = 0; m < (1 << 3); m++) { + fp4_copy_sec(q[1]->x, t1[m]->x, m == col); + fp4_copy_sec(q[1]->y, t1[m]->y, m == col); +#if !defined(EP_MIXED) + fp4_copy_sec(q[1]->z, t1[m]->z, m == col); +#endif + } + ep4_neg(q[2], q[1]); + fp4_copy_sec(q[1]->y, q[2]->y, sac1[j]); + ep4_add(r, r, q[1]); } - for (size_t i = 0; i < 8; i++) { - /* Tables are built with points already negated, so no need here. */ - ep4_sub(q, r, t[i][0]); - fp4_copy_sec(r->x, q->x, b[i]); - fp4_copy_sec(r->y, q->y, b[i]); - fp4_copy_sec(r->z, q->z, b[i]); - } + ep4_sub(q[1], r, q[0]); + fp4_copy_sec(r->x, q[1]->x, even0); + fp4_copy_sec(r->y, q[1]->y, even0); + fp4_copy_sec(r->z, q[1]->z, even0); + + ep4_sub(q[1], r, q[4]); + fp4_copy_sec(r->x, q[1]->x, even1); + fp4_copy_sec(r->y, q[1]->y, even1); + fp4_copy_sec(r->z, q[1]->z, even1); /* Convert r to affine coordinates. */ ep4_norm(r, r); @@ -283,13 +338,13 @@ static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - ep4_free(q); - ep4_free(w); - for (int i = 0; i < 4; i++) { + for (int i = 0; i < 7; i++) { bn_free(_k[i]); - for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { - ep4_free(t[i][j]); - } + ep4_free(q[i]); + } + for (int i = 0; i < (1 << 3); i++) { + ep4_free(t0[i]); + ep4_free(t1[i]); } } } diff --git a/test/test_bn.c b/test/test_bn.c index 5ac90f83b..70d37a994 100644 --- a/test/test_bn.c +++ b/test/test_bn.c @@ -2292,7 +2292,7 @@ static int recoding(void) { bn_rand_mod(a, b); bn_rec_glv(b, c, a, b, (const bn_t *)v1, (const bn_t *)v2); ep_curve_get_ord(v2[0]); - bn_rec_sac(ptr, &l, v1, 2, v2[0]); + bn_rec_sac(ptr, &l, v1, 2, bn_bits(v2[0])); if (bn_is_even(b)) { bn_add_dig(b, b, 1); } From 76d59a766842e91ad82a4c25987da9bb81191f48 Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Wed, 14 Aug 2024 01:07:29 +0200 Subject: [PATCH 14/18] Refactor for generality. --- src/epx/relic_ep4_mul.c | 157 ++++++++++++++++------------------------ src/epx/relic_ep8_mul.c | 138 ++++++++++++++++++++--------------- 2 files changed, 140 insertions(+), 155 deletions(-) diff --git a/src/epx/relic_ep4_mul.c b/src/epx/relic_ep4_mul.c index 92e951dee..914ec44e0 100644 --- a/src/epx/relic_ep4_mul.c +++ b/src/epx/relic_ep4_mul.c @@ -179,11 +179,10 @@ static void ep4_mul_gls_imp(ep4_t r, const ep4_t p, const bn_t k) { #if EP_MUL == LWREG || !defined(STRIP) static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { - size_t l; + size_t l, c = 2, m = 8; bn_t n, _k[8], u; - int8_t even0, even1, col; - int8_t sac0[4 * (RLC_FP_BITS + 1)], sac1[4 * (RLC_FP_BITS + 1)]; - ep4_t q[8], t0[1 << 3], t1[1 << 3] + int8_t even[2], col, sac[2][4 * (RLC_FP_BITS + 1)]; + ep4_t q[8], t[2][1 << 3]; bn_null(n); bn_null(u); @@ -197,11 +196,11 @@ static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { bn_new(_k[i]); ep4_new(q[i]); } - for (int i = 0; i < (1 << 3); i++) { - ep4_null(t0[i]); - ep4_new(t0[i]); - ep4_null(t1[i]); - ep4_new(t1[i]); + for (size_t i = 0; i < c; i++) { + for (int j = 0; j < (j << 3); i++) { + ep4_null(t[i][j]); + ep4_new(t[i][j]); + } } ep4_curve_get_ord(n); @@ -209,11 +208,11 @@ static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { bn_mod(_k[0], k, n); bn_rec_frb(_k, 8, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - even0 = bn_is_even(_k[0]); - bn_add_dig(_k[0], _k[0], even0); - even1 = bn_is_even(_k[4]); - bn_add_dig(_k[4], _k[4], even1); - + for (size_t i = 0; i < c; i++) { + even[i] = bn_is_even(_k[i * m / c]); + bn_add_dig(_k[i * m / c], _k[i * m / c], even[i]); + } + ep4_norm(q[0], p); for (size_t i = 1; i < 8; i++) { ep4_psi(q[i], q[i - 1]); @@ -224,25 +223,20 @@ static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { bn_abs(_k[i], _k[i]); } - ep4_copy(t0[0], q[0]); - for (size_t i = 1; i < (1 << 3); i++) { - l = util_bits_dig(i); - ep4_add(t0[i], t0[i ^ (1 << (l - 1))], q[l]); - } - ep4_copy(t1[0], q[4]); - for (size_t i = 1; i < (1 << 3); i++) { - l = util_bits_dig(i); - ep4_add(t1[i], t1[i ^ (1 << (l - 1))], q[4 + l]); + for (size_t i = 0; i < c; i++) { + ep4_copy(t[i][0], q[i * m / c]); + for (size_t j = 1; j < (1 << 3); j++) { + l = util_bits_dig(j); + ep4_add(t[i][j], t[i][j ^ (1 << (l - 1))], q[l + i * m / c]); + } + l = RLC_FP_BITS + 1; + bn_rec_sac(sac[i], &l, _k + i * m / c, m / c, bn_bits(n)); } - l = RLC_FP_BITS + 1; - bn_rec_sac(sac0, &l, _k, 4, bn_bits(n)); - l = RLC_FP_BITS + 1; - bn_rec_sac(sac1, &l, _k + 4, 4, bn_bits(n)); - #if defined(EP_MIXED) - ep4_norm_sim(t0 + 1, t0 + 1, (1 << 3) - 1); - ep4_norm_sim(t1 + 1, t1 + 1, (1 << 3) - 1); + for (size_t i = 0; i < c; i++) { + ep4_norm_sim(t[i] + 1, t[i] + 1, (1 << 3) - 1); + } fp4_set_dig(r->z, 1); fp4_set_dig(q[1]->z, 1); r->coord = q[1]->coord = BASIC; @@ -250,84 +244,54 @@ static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { r->coord = q[1]->coord = EP_ADD; #endif - col = 0; - for (int i = 3; i > 0; i--) { - col <<= 1; - col += sac0[i * l + l - 1]; - } - for (size_t m = 0; m < (1 << 3); m++) { - fp4_copy_sec(r->x, t0[m]->x, m == col); - fp4_copy_sec(r->y, t0[m]->y, m == col); -#if !defined(EP_MIXED) - fp4_copy_sec(r->z, t0[m]->z, m == col); -#endif - } - ep4_neg(q[1], r); - fp4_copy_sec(r->y, q[1]->y, sac0[l - 1]); - - col = 0; - for (int i = 3; i > 0; i--) { - col <<= 1; - col += sac1[i * l + l - 1]; - } - for (size_t m = 0; m < (1 << 3); m++) { - fp4_copy_sec(q[1]->x, t1[m]->x, m == col); - fp4_copy_sec(q[1]->y, t1[m]->y, m == col); -#if !defined(EP_MIXED) - fp4_copy_sec(q[1]->z, t1[m]->z, m == col); -#endif - } - ep4_neg(q[2], q[1]); - fp4_copy_sec(q[1]->y, q[2]->y, sac1[l - 1]); - ep4_add(r, r, q[1]); - - for (int j = l - 2; j >= 0; j--) { - ep4_dbl(r, r); - + ep4_set_infty(r); + for (size_t i = 0; i < c; i++) { col = 0; - for (int i = 3; i > 0; i--) { + for (int j = 3; j > 0; j--) { col <<= 1; - col += sac0[i * l + j]; + col += sac[i][j * l + l - 1]; } - for (size_t m = 0; m < (1 << 3); m++) { - fp4_copy_sec(q[1]->x, t0[m]->x, m == col); - fp4_copy_sec(q[1]->y, t0[m]->y, m == col); + fp4_copy_sec(q[1]->x, t[i][m]->x, m == col); + fp4_copy_sec(q[1]->y, t[i][m]->y, m == col); #if !defined(EP_MIXED) - fp4_copy_sec(q[1]->z, t0[m]->z, m == col); + fp4_copy_sec(q[1]->z, t[i][m]->z, m == col); #endif } ep4_neg(q[2], q[1]); - fp4_copy_sec(q[1]->y, q[2]->y, sac0[j]); + fp4_copy_sec(q[1]->y, q[2]->y, sac[i][l - 1]); ep4_add(r, r, q[1]); + } - col = 0; - for (int i = 3; i > 0; i--) { - col <<= 1; - col += sac1[i * l + j]; - } + for (int j = l - 2; j >= 0; j--) { + ep4_dbl(r, r); + + for (size_t i = 0; i < c; i++) { + col = 0; + for (int k = 3; k > 0; k--) { + col <<= 1; + col += sac[i][k * l + j]; + } - for (size_t m = 0; m < (1 << 3); m++) { - fp4_copy_sec(q[1]->x, t1[m]->x, m == col); - fp4_copy_sec(q[1]->y, t1[m]->y, m == col); + for (size_t m = 0; m < (1 << 3); m++) { + fp4_copy_sec(q[1]->x, t[i][m]->x, m == col); + fp4_copy_sec(q[1]->y, t[i][m]->y, m == col); #if !defined(EP_MIXED) - fp4_copy_sec(q[1]->z, t1[m]->z, m == col); + fp4_copy_sec(q[1]->z, t[i][m]->z, m == col); #endif + } + ep4_neg(q[2], q[1]); + fp4_copy_sec(q[1]->y, q[2]->y, sac[i][j]); + ep4_add(r, r, q[1]); } - ep4_neg(q[2], q[1]); - fp4_copy_sec(q[1]->y, q[2]->y, sac1[j]); - ep4_add(r, r, q[1]); } - ep4_sub(q[1], r, q[0]); - fp4_copy_sec(r->x, q[1]->x, even0); - fp4_copy_sec(r->y, q[1]->y, even0); - fp4_copy_sec(r->z, q[1]->z, even0); - - ep4_sub(q[1], r, q[4]); - fp4_copy_sec(r->x, q[1]->x, even1); - fp4_copy_sec(r->y, q[1]->y, even1); - fp4_copy_sec(r->z, q[1]->z, even1); + for (size_t i = 0; i < c; i++) { + ep4_sub(q[1], r, q[i * m / c]); + fp4_copy_sec(r->x, q[1]->x, even[i]); + fp4_copy_sec(r->y, q[1]->y, even[i]); + fp4_copy_sec(r->z, q[1]->z, even[i]); + } /* Convert r to affine coordinates. */ ep4_norm(r, r); @@ -338,13 +302,14 @@ static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - for (int i = 0; i < 7; i++) { + for (int i = 0; i < 8; i++) { bn_free(_k[i]); ep4_free(q[i]); } - for (int i = 0; i < (1 << 3); i++) { - ep4_free(t0[i]); - ep4_free(t1[i]); + for (size_t i = 0; i < c; i++) { + for (int j = 0; j < (j << 3); i++) { + ep4_free(t[i][j]); + } } } } diff --git a/src/epx/relic_ep8_mul.c b/src/epx/relic_ep8_mul.c index c741b7c68..92d855573 100644 --- a/src/epx/relic_ep8_mul.c +++ b/src/epx/relic_ep8_mul.c @@ -127,25 +127,25 @@ static void ep8_mul_gls_imp(ep8_t r, const ep8_t p, const bn_t k) { #if EP_MUL == LWREG || !defined(STRIP) static void ep8_mul_reg_gls(ep8_t r, const ep8_t p, const bn_t k) { - int8_t reg[16][RLC_FP_BITS + 1], b[16], s[16], c0, n0; - ep8_t q, w, t[16][1 << (RLC_WIDTH - 2)]; + size_t l, c = 4, m = 16; bn_t n, _k[16], u; - size_t l, len, _l[16]; + int8_t even[4], col, sac[4][4 * (RLC_FP_BITS + 1)]; + ep8_t q[16], t[4][1 << 3]; bn_null(n); bn_null(u); - ep8_null(q); - ep8_null(w); RLC_TRY { bn_new(n); bn_new(u); - ep8_new(q); - ep8_new(w); - for (size_t i = 0; i < 16; i++) { + for (int i = 0; i < 16; i++) { bn_null(_k[i]); + ep8_null(q[i]); bn_new(_k[i]); - for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep8_new(q[i]); + } + for (size_t i = 0; i < c; i++) { + for (int j = 0; j < (j << 3); i++) { ep8_null(t[i][j]); ep8_new(t[i][j]); } @@ -156,70 +156,89 @@ static void ep8_mul_reg_gls(ep8_t r, const ep8_t p, const bn_t k) { bn_mod(_k[0], k, n); bn_rec_frb(_k, 16, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - l = 0; - /* Make some extra room for BN curves that grow subscalars by 1. */ - len = bn_bits(u) + (ep_curve_is_pairf() == EP_BN); - ep8_norm(t[0][0], p); + for (size_t i = 0; i < c; i++) { + even[i] = bn_is_even(_k[i * m / c]); + bn_add_dig(_k[i * m / c], _k[i * m / c], even[i]); + } + + ep8_norm(q[0], p); + for (size_t i = 1; i < 16; i++) { + ep8_frb(q[i], q[i - 1], 1); + } for (size_t i = 0; i < 16; i++) { - s[i] = bn_sign(_k[i]); + ep8_neg(r, q[i]); + fp8_copy_sec(q[i]->y, r->y, bn_sign(_k[i]) == RLC_NEG); bn_abs(_k[i], _k[i]); - b[i] = bn_is_even(_k[i]); - _k[i]->dp[0] |= b[i]; - - _l[i] = RLC_FP_BITS + 1; - bn_rec_reg(reg[i], &_l[i], _k[i], len, RLC_WIDTH); - l = RLC_MAX(l, _l[i]); - - /* Apply Frobenius before flipping sign to build table. */ - if (i > 0) { - ep8_frb(t[i][0], t[i - 1][0], 1); - } } - for (size_t i = 0; i < 16; i++) { - ep8_neg(q, t[i][0]); - fp8_copy_sec(q->y, t[i][0]->y, s[i] == RLC_POS); - ep8_tab(t[i], q, RLC_WIDTH); + for (size_t i = 0; i < c; i++) { + ep8_copy(t[i][0], q[i * m / c]); + for (size_t j = 1; j < (1 << 3); j++) { + l = util_bits_dig(j); + ep8_add(t[i][j], t[i][j ^ (1 << (l - 1))], q[l + i * m / c]); + } + l = RLC_FP_BITS + 1; + bn_rec_sac(sac[i], &l, _k + i * m / c, m / c, bn_bits(n)); } #if defined(EP_MIXED) - fp8_set_dig(w->z, 1); - w->coord = BASIC; + for (size_t i = 0; i < c; i++) { + ep8_norm_sim(t[i] + 1, t[i] + 1, (1 << 3) - 1); + } + fp8_set_dig(r->z, 1); + fp8_set_dig(q[1]->z, 1); + r->coord = q[1]->coord = BASIC; #else - w->coord = = EP_ADD; + r->coord = q[1]->coord = EP_ADD; #endif ep8_set_infty(r); - for (int j = l - 1; j >= 0; j--) { - for (size_t i = 0; i < RLC_WIDTH - 1; i++) { - ep8_dbl(r, r); + for (size_t i = 0; i < c; i++) { + col = 0; + for (int j = 3; j > 0; j--) { + col <<= 1; + col += sac[i][j * l + l - 1]; } + for (size_t m = 0; m < (1 << 3); m++) { + fp8_copy_sec(q[1]->x, t[i][m]->x, m == col); + fp8_copy_sec(q[1]->y, t[i][m]->y, m == col); +#if !defined(EP_MIXED) + fp8_copy_sec(q[1]->z, t[i][m]->z, m == col); +#endif + } + ep8_neg(q[2], q[1]); + fp8_copy_sec(q[1]->y, q[2]->y, sac[i][l - 1]); + ep8_add(r, r, q[1]); + } - for (size_t i = 0; i < 16; i++) { - n0 = reg[i][j]; - c0 = (n0 >> 7); - n0 = ((n0 ^ c0) - c0) >> 1; - - for (size_t m = 0; m < (1 << (RLC_WIDTH - 2)); m++) { - fp8_copy_sec(w->x, t[i][m]->x, m == n0); - fp8_copy_sec(w->y, t[i][m]->y, m == n0); - #if !defined(EP_MIXED) - fp8_copy_sec(w->z, t[i][m]->z, m == n0); - #endif - } + for (int j = l - 2; j >= 0; j--) { + ep8_dbl(r, r); - ep8_neg(q, w); - fp8_copy_sec(q->y, w->y, c0 == 0); - ep8_add(r, r, q); + for (size_t i = 0; i < c; i++) { + col = 0; + for (int k = 3; k > 0; k--) { + col <<= 1; + col += sac[i][k * l + j]; + } + + for (size_t m = 0; m < (1 << 3); m++) { + fp8_copy_sec(q[1]->x, t[i][m]->x, m == col); + fp8_copy_sec(q[1]->y, t[i][m]->y, m == col); +#if !defined(EP_MIXED) + fp8_copy_sec(q[1]->z, t[i][m]->z, m == col); +#endif + } + ep8_neg(q[2], q[1]); + fp8_copy_sec(q[1]->y, q[2]->y, sac[i][j]); + ep8_add(r, r, q[1]); } } - for (size_t i = 0; i < 16; i++) { - /* Tables are built with points already negated, so no need here. */ - ep8_sub(q, r, t[i][0]); - fp8_copy_sec(r->x, q->x, b[i]); - fp8_copy_sec(r->y, q->y, b[i]); - fp8_copy_sec(r->z, q->z, b[i]); + for (size_t i = 0; i < c; i++) { + ep8_sub(q[1], r, q[i * m / c]); + fp8_copy_sec(r->x, q[1]->x, even[i]); + fp8_copy_sec(r->y, q[1]->y, even[i]); + fp8_copy_sec(r->z, q[1]->z, even[i]); } /* Convert r to affine coordinates. */ @@ -231,11 +250,12 @@ static void ep8_mul_reg_gls(ep8_t r, const ep8_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - ep8_free(q); - ep8_free(w); for (int i = 0; i < 16; i++) { bn_free(_k[i]); - for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep8_free(q[i]); + } + for (size_t i = 0; i < c; i++) { + for (int j = 0; j < (j << 3); i++) { ep8_free(t[i][j]); } } From 7193ea68b6723253c0987f9e68eb089b148b0e6a Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Wed, 14 Aug 2024 13:56:53 +0200 Subject: [PATCH 15/18] Fixes. --- src/epx/relic_ep2_mul.c | 4 +- src/epx/relic_ep4_mul.c | 15 ++-- src/epx/relic_ep8_mul.c | 15 ++-- src/pc/relic_pc_exp.c | 177 +++++++++++++++----------------------- src/pp/relic_pp_map_k54.c | 1 - 5 files changed, 83 insertions(+), 129 deletions(-) diff --git a/src/epx/relic_ep2_mul.c b/src/epx/relic_ep2_mul.c index b689b4658..7da80588f 100644 --- a/src/epx/relic_ep2_mul.c +++ b/src/epx/relic_ep2_mul.c @@ -129,7 +129,7 @@ static void ep2_mul_gls_imp(ep2_t r, const ep2_t p, const bn_t k) { static void ep2_mul_reg_gls(ep2_t r, const ep2_t p, const bn_t k) { size_t l; bn_t n, _k[4], u; - int8_t even, col, sac[4 * (RLC_FP_BITS + 1)]; + int8_t even, col, sac[4 * RLC_FP_BITS]; ep2_t q[4], t[1 << 3]; bn_null(n); @@ -171,7 +171,7 @@ static void ep2_mul_reg_gls(ep2_t r, const ep2_t p, const bn_t k) { ep2_add(t[i], t[i ^ (1 << (l - 1))], q[l]); } - l = RLC_FP_BITS + 1; + l = RLC_FP_BITS; bn_rec_sac(sac, &l, _k, 4, bn_bits(n)); #if defined(EP_MIXED) diff --git a/src/epx/relic_ep4_mul.c b/src/epx/relic_ep4_mul.c index 914ec44e0..de4d207d6 100644 --- a/src/epx/relic_ep4_mul.c +++ b/src/epx/relic_ep4_mul.c @@ -197,7 +197,7 @@ static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { ep4_new(q[i]); } for (size_t i = 0; i < c; i++) { - for (int j = 0; j < (j << 3); i++) { + for (int j = 0; j < (1 << 3); j++) { ep4_null(t[i][j]); ep4_new(t[i][j]); } @@ -208,11 +208,6 @@ static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { bn_mod(_k[0], k, n); bn_rec_frb(_k, 8, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - for (size_t i = 0; i < c; i++) { - even[i] = bn_is_even(_k[i * m / c]); - bn_add_dig(_k[i * m / c], _k[i * m / c], even[i]); - } - ep4_norm(q[0], p); for (size_t i = 1; i < 8; i++) { ep4_psi(q[i], q[i - 1]); @@ -222,7 +217,11 @@ static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { fp4_copy_sec(q[i]->y, r->y, bn_sign(_k[i]) == RLC_NEG); bn_abs(_k[i], _k[i]); } - + for (size_t i = 0; i < c; i++) { + even[i] = bn_is_even(_k[i * m / c]); + bn_add_dig(_k[i * m / c], _k[i * m / c], even[i]); + } + for (size_t i = 0; i < c; i++) { ep4_copy(t[i][0], q[i * m / c]); for (size_t j = 1; j < (1 << 3); j++) { @@ -307,7 +306,7 @@ static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { ep4_free(q[i]); } for (size_t i = 0; i < c; i++) { - for (int j = 0; j < (j << 3); i++) { + for (int j = 0; j < (1 << 3); j++) { ep4_free(t[i][j]); } } diff --git a/src/epx/relic_ep8_mul.c b/src/epx/relic_ep8_mul.c index 92d855573..6ca09592e 100644 --- a/src/epx/relic_ep8_mul.c +++ b/src/epx/relic_ep8_mul.c @@ -145,7 +145,7 @@ static void ep8_mul_reg_gls(ep8_t r, const ep8_t p, const bn_t k) { ep8_new(q[i]); } for (size_t i = 0; i < c; i++) { - for (int j = 0; j < (j << 3); i++) { + for (int j = 0; j < (1 << 3); j++) { ep8_null(t[i][j]); ep8_new(t[i][j]); } @@ -156,11 +156,6 @@ static void ep8_mul_reg_gls(ep8_t r, const ep8_t p, const bn_t k) { bn_mod(_k[0], k, n); bn_rec_frb(_k, 16, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - for (size_t i = 0; i < c; i++) { - even[i] = bn_is_even(_k[i * m / c]); - bn_add_dig(_k[i * m / c], _k[i * m / c], even[i]); - } - ep8_norm(q[0], p); for (size_t i = 1; i < 16; i++) { ep8_frb(q[i], q[i - 1], 1); @@ -170,7 +165,11 @@ static void ep8_mul_reg_gls(ep8_t r, const ep8_t p, const bn_t k) { fp8_copy_sec(q[i]->y, r->y, bn_sign(_k[i]) == RLC_NEG); bn_abs(_k[i], _k[i]); } - + for (size_t i = 0; i < c; i++) { + even[i] = bn_is_even(_k[i * m / c]); + bn_add_dig(_k[i * m / c], _k[i * m / c], even[i]); + } + for (size_t i = 0; i < c; i++) { ep8_copy(t[i][0], q[i * m / c]); for (size_t j = 1; j < (1 << 3); j++) { @@ -255,7 +254,7 @@ static void ep8_mul_reg_gls(ep8_t r, const ep8_t p, const bn_t k) { ep8_free(q[i]); } for (size_t i = 0; i < c; i++) { - for (int j = 0; j < (j << 3); i++) { + for (int j = 0; j < (1 << 3); j++) { ep8_free(t[i][j]); } } diff --git a/src/pc/relic_pc_exp.c b/src/pc/relic_pc_exp.c index 054f911a8..33307a7f7 100644 --- a/src/pc/relic_pc_exp.c +++ b/src/pc/relic_pc_exp.c @@ -263,47 +263,39 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { * @param[in] b - the exponent. * @param[in] f - the maximum Frobenius power. */ -void gt_exp_reg_gls(gt_t c, const gt_t a, const bn_t b, size_t f) { - int8_t c0, n0, *reg = RLC_ALLOCA(int8_t, f * (RLC_FP_BITS + 1)); - int8_t *e = RLC_ALLOCA(int8_t, f), *s = RLC_ALLOCA(int8_t, f); - gt_t q, w, *t = RLC_ALLOCA(gt_t, f * RLC_GT_TABLE); - bn_t n, u, *_b = RLC_ALLOCA(bn_t, f); - size_t l, len, *_l = RLC_ALLOCA(size_t, f); - - if (reg == NULL || e == NULL || t == NULL || _b == NULL || _l == NULL) { +static void gt_exp_reg_gls(gt_t c, const gt_t a, const bn_t b, size_t d, + size_t f) { + size_t l, s = (1 << (f / d - 1)); + bn_t n, *_b = RLC_ALLOCA(bn_t, f), u; + int8_t col, *e = RLC_ALLOCA(int8_t, d); + int8_t *sac = RLC_ALLOCA(int8_t, d * f * RLC_FP_BITS); + gt_t *q = RLC_ALLOCA(gt_t, f), *t = RLC_ALLOCA(gt_t, d * s); + + if (sac == NULL || e == NULL || t == NULL || _b == NULL || q == NULL) { RLC_THROW(ERR_NO_MEMORY); return; } - if (bn_is_zero(b)) { - RLC_FREE(reg); - RLC_FREE(e); - RLC_FREE(s); - RLC_FREE(t); - RLC_FREE(_b); - RLC_FREE(_l); - return gt_set_unity(c); - } - bn_null(n); bn_null(u); - gt_null(q); - gt_null(w); RLC_TRY { bn_new(n); bn_new(u); - gt_new(q); - gt_new(w); - for (size_t i = 0; i < f; i++) { + for (int i = 0; i < f; i++) { bn_null(_b[i]); + gt_null(q[i]); bn_new(_b[i]); - for (size_t j = 0; j < RLC_GT_TABLE; j++) { - gt_null(t[i * RLC_GT_TABLE + j]); - gt_new(t[i * RLC_GT_TABLE + j]); + gt_new(q[i]); + } + for (size_t i = 0; i < d; i++) { + for (int j = 0; j < s; j++) { + gt_null(t[i * s + j]); + gt_new(t[i * s + j]); } } + gt_get_ord(n); fp_prime_get_par(u); if (ep_curve_is_pairf() == EP_SG18) { /* Compute base -3*u for the recoding below. */ @@ -311,93 +303,56 @@ void gt_exp_reg_gls(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_add(u, u, n); bn_neg(u, u); } - gt_get_ord(n); - bn_abs(_b[0], b); - bn_mod(_b[0], _b[0], n); - if (bn_sign(b) == RLC_NEG) { - bn_neg(_b[0], _b[0]); - } + bn_mod(_b[0], b, n); bn_rec_frb(_b, f, _b[0], u, n, ep_curve_is_pairf() == EP_BN); - l = 0; - len = bn_bits(u) + (ep_curve_is_pairf() == EP_BN); + gt_copy(q[0], a); + for (size_t i = 1; i < f; i++) { + gt_psi(q[i], q[i - 1]); + } for (size_t i = 0; i < f; i++) { - s[i] = bn_sign(_b[i]); - e[i] = bn_is_even(_b[i]); - _b[i]->dp[0] |= e[i]; - - _l[i] = RLC_FP_BITS + 1; - bn_rec_reg(reg + i * (RLC_FP_BITS + 1), &_l[i], _b[i], len, RLC_WIDTH); - l = RLC_MAX(l, _l[i]); + gt_inv(c, q[i]); + gt_copy_sec(q[i], c, bn_sign(_b[i]) == RLC_NEG); + bn_abs(_b[i], _b[i]); } - - if (ep_curve_is_pairf() == EP_K16 || ep_curve_embed() == 18) { - gt_copy(t[0], a); - for (size_t i = 1; i < f; i++) { - gt_psi(t[i * RLC_GT_TABLE], t[(i - 1) * RLC_GT_TABLE]); - } - for (size_t i = 0; i < f; i++) { - gt_inv(q, t[i * RLC_GT_TABLE]); - gt_copy_sec(q, t[i * RLC_GT_TABLE], s[i] == RLC_POS); - if (RLC_WIDTH > 2) { - gt_sqr(t[i * RLC_GT_TABLE], q); - gt_mul(t[i * RLC_GT_TABLE + 1], t[i * RLC_GT_TABLE], q); - for (size_t j = 2; j < RLC_GT_TABLE; j++) { - gt_mul(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j - 1], - t[i * (RLC_GT_TABLE)]); - } - } - gt_copy(t[i * RLC_GT_TABLE], q); - } - } else { - gt_copy(t[0], a); - gt_inv(q, t[0]); - gt_copy_sec(q, t[0], bn_sign(_b[0]) == RLC_POS); - if (RLC_WIDTH > 2) { - gt_sqr(t[0], q); - gt_mul(t[1], t[0], q); - for (size_t j = 2; j < RLC_GT_TABLE; j++) { - gt_mul(t[j], t[j - 1], t[0]); - } - } - gt_copy(t[0], q); - for (size_t i = 1; i < f; i++) { - for (size_t j = 0; j < RLC_GT_TABLE; j++) { - gt_psi(t[i * RLC_GT_TABLE + j], t[(i - 1) * RLC_GT_TABLE + j]); - if (s[i] != s[i - 1]) { - gt_inv(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j]); - } - } + for (size_t i = 0; i < d; i++) { + e[i] = bn_is_even(_b[i * f / d]); + bn_add_dig(_b[i * f / d], _b[i * f / d], e[i]); + } + + for (size_t i = 0; i < d; i++) { + gt_copy(t[i * s], q[i * f / d]); + for (size_t j = 1; j < s; j++) { + l = util_bits_dig(j); + gt_mul(t[i * s + j], t[i * s + (j ^ (1 << (l - 1)))], q[l + i * f / d]); } + l = RLC_FP_BITS; + bn_rec_sac(sac + i * f * RLC_FP_BITS, &l, _b + i * f / d, f / d, bn_bits(n)); } gt_set_unity(c); for (int j = l - 1; j >= 0; j--) { - for (size_t i = 0; i < RLC_WIDTH - 1; i++) { - gt_sqr(c, c); - } - - for (size_t i = 0; i < f; i++) { - n0 = reg[i * (RLC_FP_BITS + 1) + j]; - c0 = (n0 >> 7); - n0 = ((n0 ^ c0) - c0) >> 1; - - for (size_t m = 0; m < RLC_GT_TABLE; m++) { - gt_copy_sec(w, t[i * RLC_GT_TABLE + m], m == n0); + gt_sqr(c, c); + for (size_t i = 0; i < d; i++) { + col = 0; + for (int k = f / d - 1; k > 0; k--) { + col <<= 1; + col += sac[i * f * RLC_FP_BITS + k * l + j]; } - - gt_inv(q, w); - gt_copy_sec(q, w, c0 == 0); - gt_mul(c, c, q); - + + for (size_t m = 0; m < s; m++) { + gt_copy_sec(q[1], t[i * s + m], m == col); + } + gt_inv(q[2], q[1]); + gt_copy_sec(q[1], q[2], sac[i * f * RLC_FP_BITS + j]); + gt_mul(c, c, q[1]); } } - for (size_t i = 0; i < f; i++) { - /* Tables are built with points already negated, so no need here. */ - gt_inv(q, t[i * RLC_GT_TABLE]); - gt_mul(q, c, q); - gt_copy_sec(c, q, e[i]); + for (size_t i = 0; i < d; i++) { + gt_inv(q[1], q[i * f / d]); + gt_mul(q[1], q[1], c); + gt_copy_sec(c, q[1], e[i]); } } RLC_CATCH_ANY { @@ -406,20 +361,20 @@ void gt_exp_reg_gls(gt_t c, const gt_t a, const bn_t b, size_t f) { RLC_FINALLY { bn_free(n); bn_free(u); - gt_free(q); - gt_free(w); - for (size_t i = 0; i < f; i++) { + for (int i = 0; i < f; i++) { bn_free(_b[i]); - for (size_t j = 0; j < RLC_GT_TABLE; j++) { - gt_free(t[i * RLC_GT_TABLE + j]); + gt_free(q[i]); + } + for (size_t i = 0; i < d; i++) { + for (int j = 0; j < s; j++) { + gt_free(t[i * d + j]); } } - RLC_FREE(reg); RLC_FREE(e); - RLC_FREE(s); - RLC_FREE(t); RLC_FREE(_b); - RLC_FREE(_l); + RLC_FREE(q); + RLC_FREE(t); + RLC_FREE(sac); } } @@ -559,7 +514,9 @@ void gt_exp_sec(gt_t c, const gt_t a, const bn_t b) { } #if FP_PRIME <= 1536 - gt_exp_reg_gls(c, a, b, ep_curve_frdim()); + size_t d = ep_curve_frdim(); + d = (d > 4 ? d / 4 : 1); + gt_exp_reg_gls(c, a, b, d, ep_curve_frdim()); #else RLC_CAT(RLC_GT_LOWER, exp_monty)(c, a, b); #endif diff --git a/src/pp/relic_pp_map_k54.c b/src/pp/relic_pp_map_k54.c index 17b2b0181..5860fa94a 100644 --- a/src/pp/relic_pp_map_k54.c +++ b/src/pp/relic_pp_map_k54.c @@ -181,7 +181,6 @@ void pp_map_k54(fp54_t r, const ep_t p, const fp9_t qx, const fp9_t qy) { if (bn_sign(a) == RLC_NEG) { fp54_inv_cyc(r, r); } - fp18_print(r[0]); pp_exp_k54(r, r); break; } From a06369fd8e22a11c78367f4ec9d4981cdce5379c Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Wed, 14 Aug 2024 14:30:31 +0200 Subject: [PATCH 16/18] Fix choice of algo for supersingular curves. --- src/pc/relic_pc_exp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pc/relic_pc_exp.c b/src/pc/relic_pc_exp.c index 33307a7f7..0c03222af 100644 --- a/src/pc/relic_pc_exp.c +++ b/src/pc/relic_pc_exp.c @@ -513,7 +513,7 @@ void gt_exp_sec(gt_t c, const gt_t a, const bn_t b) { return; } -#if FP_PRIME <= 1536 +#if FP_PRIME < 1536 size_t d = ep_curve_frdim(); d = (d > 4 ? d / 4 : 1); gt_exp_reg_gls(c, a, b, d, ep_curve_frdim()); From c82979ccee9c5fc2358e830b9daad02d8018f488 Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Wed, 14 Aug 2024 14:33:53 +0200 Subject: [PATCH 17/18] Fix again. --- src/pc/relic_pc_exp.c | 135 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/src/pc/relic_pc_exp.c b/src/pc/relic_pc_exp.c index 0c03222af..55a08ca30 100644 --- a/src/pc/relic_pc_exp.c +++ b/src/pc/relic_pc_exp.c @@ -255,6 +255,8 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { } } +#if FP_PRIME < 1536 + /** * Exponentiates an element from G_T in constant time. * @@ -378,6 +380,137 @@ static void gt_exp_reg_gls(gt_t c, const gt_t a, const bn_t b, size_t d, } } +#else + +/** + * Exponentiates an element from G_T in constant time. + * + * @param[out] c - the result. + * @param[in] a - the element to exponentiate. + * @param[in] b - the exponent. + * @param[in] f - the maximum Frobenius power. + */ +void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { + int8_t *naf = RLC_ALLOCA(int8_t, f * (RLC_FP_BITS + 1)); + int8_t n0, *s = RLC_ALLOCA(int8_t, f); + gt_t q, *t = RLC_ALLOCA(gt_t, f * RLC_GT_TABLE); + bn_t n, u, *_b = RLC_ALLOCA(bn_t, f); + size_t l, *_l = RLC_ALLOCA(size_t, f); + + if (naf == NULL || t == NULL || _b == NULL || _l == NULL) { + RLC_THROW(ERR_NO_MEMORY); + return; + } + + if (bn_is_zero(b)) { + RLC_FREE(naf); + RLC_FREE(s); + RLC_FREE(t); + RLC_FREE(_b); + RLC_FREE(_l); + return gt_set_unity(c); + } + + bn_null(n); + bn_null(u); + gt_null(q); + + RLC_TRY { + bn_new(n); + bn_new(u); + gt_new(q); + for (size_t i = 0; i < f; i++) { + bn_null(_b[i]); + bn_new(_b[i]); + for (size_t j = 0; j < RLC_GT_TABLE; j++) { + gt_null(t[i * RLC_GT_TABLE + j]); + gt_new(t[i * RLC_GT_TABLE + j]); + } + } + + fp_prime_get_par(u); + if (ep_curve_is_pairf() == EP_SG18) { + /* Compute base -3*u for the recoding below. */ + bn_dbl(n, u); + bn_add(u, u, n); + bn_neg(u, u); + } + gt_get_ord(n); + bn_abs(_b[0], b); + bn_mod(_b[0], _b[0], n); + if (bn_sign(b) == RLC_NEG) { + bn_neg(_b[0], _b[0]); + } + bn_rec_frb(_b, f, _b[0], u, n, ep_curve_is_pairf() == EP_BN); + + l = 0; + gt_copy(t[0], a); + for (size_t i = 0; i < f; i++) { + s[i] = bn_sign(_b[i]); + bn_abs(_b[i], _b[i]); + + _l[i] = RLC_FP_BITS + 1; + bn_rec_naf(naf + i * (RLC_FP_BITS + 1), &_l[i], _b[i], RLC_WIDTH); + l = RLC_MAX(l, _l[i]); + /* Apply Frobenius before flipping sign to build table. */ + if (i > 0) { + gt_psi(t[i * RLC_GT_TABLE], t[(i - 1) * RLC_GT_TABLE]); + } + } + + for (size_t i = 0; i < f; i++) { + gt_inv(q, t[i * RLC_GT_TABLE]); + gt_copy_sec(q, t[i * RLC_GT_TABLE], s[i] == RLC_POS); + if (RLC_WIDTH > 2) { + gt_sqr(t[i * RLC_GT_TABLE], q); + gt_mul(t[i * RLC_GT_TABLE + 1], t[i * RLC_GT_TABLE], q); + for (size_t j = 2; j < RLC_GT_TABLE; j++) { + gt_mul(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j - 1], + t[i * (RLC_GT_TABLE)]); + } + } + gt_copy(t[i * RLC_GT_TABLE], q); + } + + gt_set_unity(c); + for (int j = l - 1; j >= 0; j--) { + gt_sqr(c, c); + + for (size_t i = 0; i < f; i++) { + n0 = naf[i * (RLC_FP_BITS + 1) + j]; + if (n0 > 0) { + gt_mul(c, c, t[i * RLC_GT_TABLE + n0 / 2]); + } + if (n0 < 0) { + gt_inv(q, t[i * RLC_GT_TABLE - n0 / 2]); + gt_mul(c, c, q); + } + } + } + } + RLC_CATCH_ANY { + RLC_THROW(ERR_CAUGHT); + } + RLC_FINALLY { + bn_free(n); + bn_free(u); + gt_free(q); + for (size_t i = 0; i < f; i++) { + bn_free(_b[i]); + for (size_t j = 0; j < RLC_GT_TABLE; j++) { + gt_free(t[i * RLC_GT_TABLE + j]); + } + } + RLC_FREE(naf); + RLC_FREE(s); + RLC_FREE(t); + RLC_FREE(_b); + RLC_FREE(_l); + } +} + +#endif + /*============================================================================*/ /* Public definitions */ /*============================================================================*/ @@ -513,7 +646,7 @@ void gt_exp_sec(gt_t c, const gt_t a, const bn_t b) { return; } -#if FP_PRIME < 1536 +#if FP_PRIME <= 1536 size_t d = ep_curve_frdim(); d = (d > 4 ? d / 4 : 1); gt_exp_reg_gls(c, a, b, d, ep_curve_frdim()); From 9b88c8bdfbb4361bd703d32a0bb057828ac4b6a6 Mon Sep 17 00:00:00 2001 From: "Diego F. Aranha" Date: Wed, 14 Aug 2024 14:37:08 +0200 Subject: [PATCH 18/18] Trying again. --- src/pc/relic_pc_exp.c | 59 ++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/src/pc/relic_pc_exp.c b/src/pc/relic_pc_exp.c index 55a08ca30..daa5d517d 100644 --- a/src/pc/relic_pc_exp.c +++ b/src/pc/relic_pc_exp.c @@ -390,20 +390,21 @@ static void gt_exp_reg_gls(gt_t c, const gt_t a, const bn_t b, size_t d, * @param[in] b - the exponent. * @param[in] f - the maximum Frobenius power. */ -void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { - int8_t *naf = RLC_ALLOCA(int8_t, f * (RLC_FP_BITS + 1)); - int8_t n0, *s = RLC_ALLOCA(int8_t, f); - gt_t q, *t = RLC_ALLOCA(gt_t, f * RLC_GT_TABLE); +void gt_exp_reg_gls(gt_t c, const gt_t a, const bn_t b, size_t f) { + int8_t c0, n0, *reg = RLC_ALLOCA(int8_t, f * (RLC_FP_BITS + 1)); + int8_t *e = RLC_ALLOCA(int8_t, f), *s = RLC_ALLOCA(int8_t, f); + gt_t q, w, *t = RLC_ALLOCA(gt_t, f * RLC_GT_TABLE); bn_t n, u, *_b = RLC_ALLOCA(bn_t, f); - size_t l, *_l = RLC_ALLOCA(size_t, f); + size_t l, len, *_l = RLC_ALLOCA(size_t, f); - if (naf == NULL || t == NULL || _b == NULL || _l == NULL) { + if (reg == NULL || e == NULL || t == NULL || _b == NULL || _l == NULL) { RLC_THROW(ERR_NO_MEMORY); return; } if (bn_is_zero(b)) { - RLC_FREE(naf); + RLC_FREE(reg); + RLC_FREE(e); RLC_FREE(s); RLC_FREE(t); RLC_FREE(_b); @@ -414,11 +415,13 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_null(n); bn_null(u); gt_null(q); + gt_null(w); RLC_TRY { bn_new(n); bn_new(u); gt_new(q); + gt_new(w); for (size_t i = 0; i < f; i++) { bn_null(_b[i]); bn_new(_b[i]); @@ -444,13 +447,16 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_rec_frb(_b, f, _b[0], u, n, ep_curve_is_pairf() == EP_BN); l = 0; + len = bn_bits(u) + (ep_curve_is_pairf() == EP_BN); gt_copy(t[0], a); for (size_t i = 0; i < f; i++) { s[i] = bn_sign(_b[i]); bn_abs(_b[i], _b[i]); + e[i] = bn_is_even(_b[i]); + _b[i]->dp[0] |= e[i]; _l[i] = RLC_FP_BITS + 1; - bn_rec_naf(naf + i * (RLC_FP_BITS + 1), &_l[i], _b[i], RLC_WIDTH); + bn_rec_reg(reg + i * (RLC_FP_BITS + 1), &_l[i], _b[i], len, RLC_WIDTH); l = RLC_MAX(l, _l[i]); /* Apply Frobenius before flipping sign to build table. */ if (i > 0) { @@ -474,19 +480,32 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { gt_set_unity(c); for (int j = l - 1; j >= 0; j--) { - gt_sqr(c, c); + for (size_t i = 0; i < RLC_WIDTH - 1; i++) { + gt_sqr(c, c); + } for (size_t i = 0; i < f; i++) { - n0 = naf[i * (RLC_FP_BITS + 1) + j]; - if (n0 > 0) { - gt_mul(c, c, t[i * RLC_GT_TABLE + n0 / 2]); - } - if (n0 < 0) { - gt_inv(q, t[i * RLC_GT_TABLE - n0 / 2]); - gt_mul(c, c, q); + n0 = reg[i * (RLC_FP_BITS + 1) + j]; + c0 = (n0 >> 7); + n0 = ((n0 ^ c0) - c0) >> 1; + + for (size_t m = 0; m < RLC_GT_TABLE; m++) { + gt_copy_sec(w, t[i * RLC_GT_TABLE + m], m == n0); } + + gt_inv(q, w); + gt_copy_sec(q, w, c0 == 0); + gt_mul(c, c, q); + } } + + for (size_t i = 0; i < f; i++) { + /* Tables are built with points already negated, so no need here. */ + gt_inv(q, t[i * RLC_GT_TABLE]); + gt_mul(q, c, q); + gt_copy_sec(c, q, e[i]); + } } RLC_CATCH_ANY { RLC_THROW(ERR_CAUGHT); @@ -495,13 +514,15 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_free(n); bn_free(u); gt_free(q); + gt_free(w); for (size_t i = 0; i < f; i++) { bn_free(_b[i]); for (size_t j = 0; j < RLC_GT_TABLE; j++) { gt_free(t[i * RLC_GT_TABLE + j]); } } - RLC_FREE(naf); + RLC_FREE(reg); + RLC_FREE(e); RLC_FREE(s); RLC_FREE(t); RLC_FREE(_b); @@ -646,10 +667,12 @@ void gt_exp_sec(gt_t c, const gt_t a, const bn_t b) { return; } -#if FP_PRIME <= 1536 +#if FP_PRIME < 1536 size_t d = ep_curve_frdim(); d = (d > 4 ? d / 4 : 1); gt_exp_reg_gls(c, a, b, d, ep_curve_frdim()); +#elif FP_PRIME == 1536 + gt_exp_reg_gls(c, a, b, 1); #else RLC_CAT(RLC_GT_LOWER, exp_monty)(c, a, b); #endif