diff --git a/bench/bench_pc.c b/bench/bench_pc.c index bb040c6f4..0ac430d47 100755 --- a/bench/bench_pc.c +++ b/bench/bench_pc.c @@ -684,6 +684,12 @@ static void arith(void) { } BENCH_END; + BENCH_RUN("gt_frb (1)") { + gt_rand(a); + BENCH_ADD(gt_frb(c, a, 1)); + } + BENCH_END; + BENCH_RUN("gt_exp") { gt_rand(a); pc_get_ord(d); diff --git a/include/relic_bn.h b/include/relic_bn.h index bbfa6e740..a4fb0e278 100644 --- a/include/relic_bn.h +++ b/include/relic_bn.h @@ -1547,11 +1547,11 @@ void bn_rec_frb(bn_t *ki, int sub, const bn_t k, const bn_t x, const bn_t n, * @param[out] b - the recoded subscalars. * @param[in] len - the length in bytes of the recoding. * @param[in] k - the subscalars to recode. - * @param[in] m - the number of subscallars to recode. - * @param[in] n - the elliptic curve group order. + * @param[in] m - the number of subscalars to recode. + * @param[in] n - the bit length of the group order. * @throw ERR_NO_BUFFER - if the buffer capacity is insufficient. */ -void bn_rec_sac(int8_t *b, size_t *len, bn_t *k, size_t m, bn_t n); +void bn_rec_sac(int8_t *b, size_t *len, bn_t *k, size_t m, size_t n); /** * Computes the coefficients of the polynomial representing the Lagrange diff --git a/src/bn/relic_bn_rec.c b/src/bn/relic_bn_rec.c index 2f8b2c210..2988a4092 100644 --- a/src/bn/relic_bn_rec.c +++ b/src/bn/relic_bn_rec.c @@ -876,10 +876,10 @@ void bn_rec_glv(bn_t k0, bn_t k1, const bn_t k, const bn_t n, const bn_t *v1, } } -void bn_rec_sac(int8_t *b, size_t *len, bn_t *k, size_t m, bn_t n) { +void bn_rec_sac(int8_t *b, size_t *len, bn_t *k, size_t m, size_t n) { /* Assume k0 is the sign-aligner. */ bn_t *t = RLC_ALLOCA(bn_t, m); - size_t l = RLC_CEIL(bn_bits(n), m) + 1; + size_t l = RLC_CEIL(n, m) + 1; int8_t bji; if (t == NULL) { diff --git a/src/ep/relic_ep_mul.c b/src/ep/relic_ep_mul.c index c79cf404d..5e554cfc5 100644 --- a/src/ep/relic_ep_mul.c +++ b/src/ep/relic_ep_mul.c @@ -77,8 +77,6 @@ static void ep_mul_glv_imp(ep_t r, const ep_t p, const bn_t k) { bn_rec_glv(k0, k1, _k, n, (const bn_t *)v1, (const bn_t *)v2); s0 = bn_sign(k0); s1 = bn_sign(k1); - bn_abs(k0, k0); - bn_abs(k1, k1); if (s0 == RLC_POS) { ep_tab(t, p, RLC_WIDTH); diff --git a/src/epx/relic_ep2_mul.c b/src/epx/relic_ep2_mul.c index 2e2386cbf..7da80588f 100644 --- a/src/epx/relic_ep2_mul.c +++ b/src/epx/relic_ep2_mul.c @@ -44,19 +44,23 @@ static void ep2_mul_gls_imp(ep2_t r, const ep2_t p, const bn_t k) { size_t l, _l[4]; bn_t n, _k[4], u; int8_t naf[4][RLC_FP_BITS + 1]; - ep2_t q[4]; + ep2_t q, t[4][1 << (RLC_WIDTH - 2)]; bn_null(n); bn_null(u); + ep2_null(q); RLC_TRY { bn_new(n); bn_new(u); - for (int i = 0; i < 4; i++) { + ep2_new(q); + for (size_t i = 0; i < 4; i++) { bn_null(_k[i]); - ep2_null(q[i]); bn_new(_k[i]); - ep2_new(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep2_null(t[i][j]); + ep2_new(t[i][j]); + } } ep2_curve_get_ord(n); @@ -64,31 +68,37 @@ static void ep2_mul_gls_imp(ep2_t r, const ep2_t p, const bn_t k) { bn_mod(_k[0], k, n); bn_rec_frb(_k, 4, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - ep2_norm(q[0], p); - ep2_frb(q[1], q[0], 1); - ep2_frb(q[2], q[1], 1); - ep2_frb(q[3], q[2], 1); - l = 0; - for (int i = 0; i < 4; i++) { - if (bn_sign(_k[i]) == RLC_NEG) { - ep2_neg(q[i], q[i]); - } + for (size_t i = 0; i < 4; i++) { _l[i] = RLC_FP_BITS + 1; - bn_rec_naf(naf[i], &_l[i], _k[i], 2); + bn_rec_naf(naf[i], &_l[i], _k[i], RLC_WIDTH); l = RLC_MAX(l, _l[i]); + if (i == 0) { + ep2_norm(q, p); + if (bn_sign(_k[0]) == RLC_NEG) { + ep2_neg(q, q); + } + ep2_tab(t[0], q, RLC_WIDTH); + } else { + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep2_frb(t[i][j], t[i - 1][j], 1); + if (bn_sign(_k[i]) != bn_sign(_k[i - 1])) { + ep2_neg(t[i][j], t[i][j]); + } + } + } } ep2_set_infty(r); for (int j = l - 1; j >= 0; j--) { ep2_dbl(r, r); - for (int i = 0; i < 4; i++) { + for (size_t i = 0; i < 4; i++) { if (naf[i][j] > 0) { - ep2_add(r, r, q[i]); + ep2_add(r, r, t[i][naf[i][j] / 2]); } if (naf[i][j] < 0) { - ep2_sub(r, r, q[i]); + ep2_sub(r, r, t[i][-naf[i][j] / 2]); } } } @@ -102,11 +112,13 @@ static void ep2_mul_gls_imp(ep2_t r, const ep2_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - for (int i = 0; i < 4; i++) { + ep2_free(q); + for (size_t i = 0; i < 4; i++) { bn_free(_k[i]); - ep2_free(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep2_free(t[i][j]); + } } - } } @@ -117,7 +129,7 @@ static void ep2_mul_gls_imp(ep2_t r, const ep2_t p, const bn_t k) { static void ep2_mul_reg_gls(ep2_t r, const ep2_t p, const bn_t k) { size_t l; bn_t n, _k[4], u; - int8_t even, col, sac[4 * (RLC_FP_BITS + 1)]; + int8_t even, col, sac[4 * RLC_FP_BITS]; ep2_t q[4], t[1 << 3]; bn_null(n); @@ -159,8 +171,8 @@ static void ep2_mul_reg_gls(ep2_t r, const ep2_t p, const bn_t k) { ep2_add(t[i], t[i ^ (1 << (l - 1))], q[l]); } - l = RLC_FP_BITS + 1; - bn_rec_sac(sac, &l, _k, 4, n); + l = RLC_FP_BITS; + bn_rec_sac(sac, &l, _k, 4, bn_bits(n)); #if defined(EP_MIXED) ep2_norm_sim(t + 1, t + 1, (1 << 3) - 1); @@ -185,7 +197,7 @@ static void ep2_mul_reg_gls(ep2_t r, const ep2_t p, const bn_t k) { } ep2_neg(q[1], r); - fp2_copy_sec(r->y, q[1]->y, sac[l - 1] != 0); + fp2_copy_sec(r->y, q[1]->y, sac[l - 1]); for (int j = l - 2; j >= 0; j--) { ep2_dbl(r, r); diff --git a/src/epx/relic_ep2_mul_sim.c b/src/epx/relic_ep2_mul_sim.c index c39c432f6..d163c42bd 100644 --- a/src/epx/relic_ep2_mul_sim.c +++ b/src/epx/relic_ep2_mul_sim.c @@ -280,9 +280,7 @@ void ep2_mul_sim_basic(ep2_t r, const ep2_t p, const bn_t k, const ep2_t q, void ep2_mul_sim_trick(ep2_t r, const ep2_t p, const bn_t k, const ep2_t q, const bn_t m) { - ep2_t t0[1 << (RLC_WIDTH / 2)]; - ep2_t t1[1 << (RLC_WIDTH / 2)]; - ep2_t t[1 << RLC_WIDTH]; + ep2_t t0[1 << (RLC_WIDTH / 2)], t1[1 << (RLC_WIDTH / 2)], t[1 << RLC_WIDTH]; bn_t n, _k, _m; size_t l0, l1, w = RLC_WIDTH / 2; uint8_t w0[2 * RLC_FP_BITS], w1[2 * RLC_FP_BITS]; @@ -305,10 +303,6 @@ void ep2_mul_sim_trick(ep2_t r, const ep2_t p, const bn_t k, const ep2_t q, bn_new(_k); bn_new(_m); - ep2_curve_get_ord(n); - bn_mod(_k, k, n); - bn_mod(_m, m, n); - for (int i = 0; i < (1 << w); i++) { ep2_null(t0[i]); ep2_null(t1[i]); @@ -320,21 +314,19 @@ void ep2_mul_sim_trick(ep2_t r, const ep2_t p, const bn_t k, const ep2_t q, ep2_new(t[i]); } + ep2_curve_get_ord(n); + bn_mod(_k, k, n); + bn_mod(_m, m, n); + ep2_set_infty(t0[0]); ep2_copy(t0[1], p); - if (bn_sign(k) == RLC_NEG) { - ep2_neg(t0[1], t0[1]); - } for (int i = 2; i < (1 << w); i++) { ep2_add(t0[i], t0[i - 1], t0[1]); } ep2_set_infty(t1[0]); ep2_copy(t1[1], q); - if (bn_sign(m) == RLC_NEG) { - ep2_neg(t1[1], t1[1]); - } - for (int i = 1; i < (1 << w); i++) { + for (int i = 2; i < (1 << w); i++) { ep2_add(t1[i], t1[i - 1], t1[1]); } @@ -345,12 +337,12 @@ void ep2_mul_sim_trick(ep2_t r, const ep2_t p, const bn_t k, const ep2_t q, } #if defined(EP_MIXED) - ep2_norm_sim(t + 1, t + 1, (1 << (RLC_WIDTH)) - 1); + ep2_norm_sim(t + 2, (const ep2_t *)(t + 2), (1 << (w + w)) - 2); #endif l0 = l1 = RLC_CEIL(2 * RLC_FP_BITS, w); - bn_rec_win(w0, &l0, k, w); - bn_rec_win(w1, &l1, m, w); + bn_rec_win(w0, &l0, _k, w); + bn_rec_win(w1, &l1, _m, w); ep2_set_infty(r); for (int i = RLC_MAX(l0, l1) - 1; i >= 0; i--) { diff --git a/src/epx/relic_ep3_mul.c b/src/epx/relic_ep3_mul.c index a814664fb..b09ca701f 100644 --- a/src/epx/relic_ep3_mul.c +++ b/src/epx/relic_ep3_mul.c @@ -85,12 +85,11 @@ static void ep3_psi(ep3_t r, const ep3_t p) { #if EP_MUL == LWNAF || !defined(STRIP) -static void ep3_mul_glv_imp(ep3_t r, const ep3_t p, const bn_t k) { - int i, j; - size_t l, _l[6]; +static void ep3_mul_gls_imp(ep3_t r, const ep3_t p, const bn_t k) { + size_t l; bn_t n, _k[6], u; - int8_t naf[6][RLC_FP_BITS + 1]; - ep3_t q[6]; + int8_t even, col, sac[6 * (RLC_FP_BITS + 1)]; + ep3_t q[6], t[1 << 5]; bn_null(n); bn_null(u); @@ -98,13 +97,18 @@ static void ep3_mul_glv_imp(ep3_t r, const ep3_t p, const bn_t k) { RLC_TRY { bn_new(n); bn_new(u); - for (i = 0; i < 6; i++) { + for (int i = 0; i < 6; i++) { bn_null(_k[i]); ep3_null(q[i]); bn_new(_k[i]); ep3_new(q[i]); } + for (int i = 0; i < (1 << 5); i++) { + ep3_null(t[i]); + ep3_new(t[i]); + } + ep3_curve_get_ord(n); fp_prime_get_par(u); if (ep_curve_is_pairf() == EP_SG18) { /* Compute base -3*u for the recoding below. */ @@ -112,40 +116,58 @@ static void ep3_mul_glv_imp(ep3_t r, const ep3_t p, const bn_t k) { bn_add(u, u, n); bn_neg(u, u); } - ep3_curve_get_ord(n); bn_mod(_k[0], k, n); bn_rec_frb(_k, 6, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - + even = bn_is_even(_k[0]); + bn_add_dig(_k[0], _k[0], even); ep3_norm(q[0], p); - for (int i = 1; i < 6; i++) { + for (size_t i = 1; i < 6; i++) { ep3_psi(q[i], q[i - 1]); } -#if defined(EP_MIXED) - ep3_norm_sim(q + 1, q + 1, 5); -#endif - - l = 0; - for (i = 0; i < 6; i++) { + for (size_t i = 0; i < 6; i++) { if (bn_sign(_k[i]) == RLC_NEG) { ep3_neg(q[i], q[i]); } - _l[i] = RLC_FP_BITS + 1; - bn_rec_naf(naf[i], &_l[i], _k[i], 2); - l = RLC_MAX(l, _l[i]); + bn_abs(_k[i], _k[i]); + } + + ep3_copy(t[0], q[0]); + for (size_t i = 1; i < (1 << 5); i++) { + l = util_bits_dig(i); + ep3_add(t[i], t[i ^ (1 << (l - 1))], q[l]); } + l = RLC_FP_BITS + 1; + bn_rec_sac(sac, &l, _k, 6, bn_bits(n)); + +#if defined(EP_MIXED) + ep3_norm_sim(t + 1, t + 1, (1 << 5) - 1); + fp3_set_dig(r->z, 1); + fp3_set_dig(q[1]->z, 1); + r->coord = q[1]->coord = BASIC; +#else + r->coord = q[1]->coord = EP_ADD; +#endif + ep3_set_infty(r); - for (j = l - 1; j >= 0; j--) { + for (int j = l - 1; j >= 0; j--) { ep3_dbl(r, r); - for (i = 0; i < 6; i++) { - if (naf[i][j] > 0) { - ep3_add(r, r, q[i]); - } - if (naf[i][j] < 0) { - ep3_sub(r, r, q[i]); - } + col = 0; + for (int i = 5; i > 0; i--) { + col <<= 1; + col += sac[i * l + j]; } + + if (sac[j]) { + ep3_sub(r, r, t[col]); + } else { + ep3_add(r, r, t[col]); + } + } + + if (even) { + ep3_sub(r, r, q[0]); } /* Convert r to affine coordinates. */ @@ -157,11 +179,13 @@ static void ep3_mul_glv_imp(ep3_t r, const ep3_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - for (i = 0; i < 3; i++) { + for (int i = 0; i < 6; i++) { bn_free(_k[i]); ep3_free(q[i]); } - + for (int i = 0; i < (1 << 5); i++) { + ep3_free(t[i]); + } } } @@ -170,100 +194,108 @@ static void ep3_mul_glv_imp(ep3_t r, const ep3_t p, const bn_t k) { #if EP_MUL == LWREG || !defined(STRIP) static void ep3_mul_reg_gls(ep3_t r, const ep3_t p, const bn_t k) { - int8_t reg[6][RLC_FP_BITS + 1], b[6], s[6], c0, n0; - ep3_t q, w, t[6][1 << (RLC_WIDTH - 2)]; + size_t l; bn_t n, _k[6], u; - size_t l, len, _l[6]; + int8_t even, col, sac[6 * (RLC_FP_BITS + 1)]; + ep3_t q[6], t[1 << 5]; bn_null(n); bn_null(u); - ep3_null(q); - ep3_null(w); RLC_TRY { bn_new(n); bn_new(u); - ep3_new(q); - ep3_new(w); - for (size_t i = 0; i < 6; i++) { + for (int i = 0; i < 6; i++) { bn_null(_k[i]); + ep3_null(q[i]); bn_new(_k[i]); - for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { - ep3_null(t[i][j]); - ep3_new(t[i][j]); - } + ep3_new(q[i]); + } + for (int i = 0; i < (1 << 5); i++) { + ep3_null(t[i]); + ep3_new(t[i]); } ep3_curve_get_ord(n); fp_prime_get_par(u); + if (ep_curve_is_pairf() == EP_SG18) { + /* Compute base -3*u for the recoding below. */ + bn_dbl(n, u); + bn_add(u, u, n); + bn_neg(u, u); + } bn_mod(_k[0], k, n); bn_rec_frb(_k, 6, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - - l = 0; - /* Make some extra room for BN curves that grow subscalars by 1. */ - len = bn_bits(u) + (ep_curve_is_pairf() == EP_BN); - ep3_norm(t[0][0], p); + even = bn_is_even(_k[0]); + bn_add_dig(_k[0], _k[0], even); + ep3_norm(q[0], p); + for (size_t i = 1; i < 6; i++) { + ep3_psi(q[i], q[i - 1]); + } for (size_t i = 0; i < 6; i++) { - s[i] = bn_sign(_k[i]); + ep3_neg(r, q[i]); + fp3_copy_sec(q[i]->y, r->y, bn_sign(_k[i]) == RLC_NEG); bn_abs(_k[i], _k[i]); - b[i] = bn_is_even(_k[i]); - _k[i]->dp[0] |= b[i]; - - _l[i] = RLC_FP_BITS + 1; - bn_rec_reg(reg[i], &_l[i], _k[i], len, RLC_WIDTH); - l = RLC_MAX(l, _l[i]); - - /* Apply Frobenius before flipping sign to build table. */ - if (i > 0) { - ep3_psi(t[i][0], t[i - 1][0]); - } } - for (size_t i = 0; i < 6; i++) { - ep3_neg(q, t[i][0]); - fp3_copy_sec(q->y, t[i][0]->y, s[i] == RLC_POS); - ep3_tab(t[i], q, RLC_WIDTH); + ep3_copy(t[0], q[0]); + for (size_t i = 1; i < (1 << 5); i++) { + l = util_bits_dig(i); + ep3_add(t[i], t[i ^ (1 << (l - 1))], q[l]); } + l = RLC_FP_BITS + 1; + bn_rec_sac(sac, &l, _k, 6, bn_bits(n)); + #if defined(EP_MIXED) - fp3_set_dig(w->z, 1); - w->coord = BASIC; + ep3_norm_sim(t + 1, t + 1, (1 << 5) - 1); + fp3_set_dig(r->z, 1); + fp3_set_dig(q[1]->z, 1); + r->coord = q[1]->coord = BASIC; #else - w->coord = = EP_ADD; + r->coord = q[1]->coord = EP_ADD; #endif - ep3_set_infty(r); - for (int j = l - 1; j >= 0; j--) { - for (size_t i = 0; i < RLC_WIDTH - 1; i++) { - ep3_dbl(r, r); - } + col = 0; + for (int i = 5; i > 0; i--) { + col <<= 1; + col += sac[i * l + l - 1]; + } + for (size_t m = 0; m < (1 << 5); m++) { + fp3_copy_sec(r->x, t[m]->x, m == col); + fp3_copy_sec(r->y, t[m]->y, m == col); +#if !defined(EP_MIXED) + fp3_copy_sec(r->z, t[m]->z, m == col); +#endif + } - for (size_t i = 0; i < 6; i++) { - n0 = reg[i][j]; - c0 = (n0 >> 7); - n0 = ((n0 ^ c0) - c0) >> 1; - - for (size_t m = 0; m < (1 << (RLC_WIDTH - 2)); m++) { - fp3_copy_sec(w->x, t[i][m]->x, m == n0); - fp3_copy_sec(w->y, t[i][m]->y, m == n0); - #if !defined(EP_MIXED) - fp3_copy_sec(w->z, t[i][m]->z, m == n0); - #endif - } + ep3_neg(q[1], r); + fp3_copy_sec(r->y, q[1]->y, sac[l - 1] != 0); + for (int j = l - 2; j >= 0; j--) { + ep3_dbl(r, r); - ep3_neg(q, w); - fp3_copy_sec(q->y, w->y, c0 == 0); - ep3_add(r, r, q); + col = 0; + for (int i = 5; i > 0; i--) { + col <<= 1; + col += sac[i * l + j]; + } + + for (size_t m = 0; m < (1 << 5); m++) { + fp3_copy_sec(q[1]->x, t[m]->x, m == col); + fp3_copy_sec(q[1]->y, t[m]->y, m == col); +#if !defined(EP_MIXED) + fp3_copy_sec(q[1]->z, t[m]->z, m == col); +#endif } + ep3_neg(q[2], q[1]); + fp3_copy_sec(q[1]->y, q[2]->y, sac[j]); + ep3_add(r, r, q[1]); } - for (size_t i = 0; i < 6; i++) { - /* Tables are built with points already negated, so no need here. */ - ep3_sub(q, r, t[i][0]); - fp3_copy_sec(r->x, q->x, b[i]); - fp3_copy_sec(r->y, q->y, b[i]); - fp3_copy_sec(r->z, q->z, b[i]); - } + ep3_sub(q[1], r, q[0]); + fp3_copy_sec(r->x, q[1]->x, even); + fp3_copy_sec(r->y, q[1]->y, even); + fp3_copy_sec(r->z, q[1]->z, even); /* Convert r to affine coordinates. */ ep3_norm(r, r); @@ -274,13 +306,12 @@ static void ep3_mul_reg_gls(ep3_t r, const ep3_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - ep3_free(q); - ep3_free(w); for (int i = 0; i < 6; i++) { bn_free(_k[i]); - for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { - ep3_free(t[i][j]); - } + ep3_free(q[i]); + } + for (int i = 0; i < (1 << 5); i++) { + ep3_free(t[i]); } } } @@ -652,7 +683,7 @@ void ep3_mul_lwnaf(ep3_t r, const ep3_t p, const bn_t k) { #if defined(EP_ENDOM) if (ep_curve_is_endom()) { - ep3_mul_glv_imp(r, p, k); + ep3_mul_gls_imp(r, p, k); return; } #endif diff --git a/src/epx/relic_ep4_mul.c b/src/epx/relic_ep4_mul.c index a6cb8a5ba..de4d207d6 100644 --- a/src/epx/relic_ep4_mul.c +++ b/src/epx/relic_ep4_mul.c @@ -82,23 +82,27 @@ static void ep4_psi(ep4_t r, const ep4_t p) { #if EP_MUL == LWNAF || !defined(STRIP) -static void ep4_mul_glv_imp(ep4_t r, const ep4_t p, const bn_t k) { +static void ep4_mul_gls_imp(ep4_t r, const ep4_t p, const bn_t k) { size_t l, _l[8]; bn_t n, _k[8], u; int8_t naf[8][RLC_FP_BITS + 1]; - ep4_t q[8]; + ep4_t q, t[8][1 << (RLC_WIDTH - 2)]; bn_null(n); bn_null(u); + ep4_null(q); RLC_TRY { bn_new(n); bn_new(u); - for (int i = 0; i < 8; i++) { + ep4_new(q); + for (size_t i = 0; i < 8; i++) { bn_null(_k[i]); - ep4_null(q[i]); bn_new(_k[i]); - ep4_new(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep4_null(t[i][j]); + ep4_new(t[i][j]); + } } ep4_curve_get_ord(n); @@ -106,34 +110,47 @@ static void ep4_mul_glv_imp(ep4_t r, const ep4_t p, const bn_t k) { bn_mod(_k[0], k, n); bn_rec_frb(_k, 8, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - ep4_norm(q[0], p); - for (size_t i = 1; i < 8; i++) { - ep4_psi(q[i], q[i - 1]); - } -#if defined(EP_MIXED) - ep4_norm_sim(q + 1, q + 1, 7); -#endif - l = 0; for (size_t i = 0; i < 8; i++) { - if (bn_sign(_k[i]) == RLC_NEG) { - ep4_neg(q[i], q[i]); - } _l[i] = RLC_FP_BITS + 1; - bn_rec_naf(naf[i], &_l[i], _k[i], 2); + bn_rec_naf(naf[i], &_l[i], _k[i], RLC_WIDTH); l = RLC_MAX(l, _l[i]); } + ep4_norm(q, p); + if (bn_sign(_k[0]) == RLC_NEG) { + ep4_neg(q, q); + } + ep4_tab(t[0], q, RLC_WIDTH); + + if (ep_curve_is_pairf() == EP_K16) { + for (size_t i = 1; i < 8; i++) { + ep4_psi(q, t[i - 1][0]); + if (bn_sign(_k[i]) == RLC_NEG) { + ep4_neg(q, q); + } + ep4_tab(t[i], q, RLC_WIDTH); + } + } else { + for (size_t i = 1; i < 8; i++) { + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep4_psi(t[i][j], t[i - 1][j]); + if (bn_sign(_k[i]) != bn_sign(_k[i - 1])) { + ep4_neg(t[i][j], t[i][j]); + } + } + } + } ep4_set_infty(r); for (int j = l - 1; j >= 0; j--) { ep4_dbl(r, r); - for (int i = 0; i < 8; i++) { + for (size_t i = 0; i < 8; i++) { if (naf[i][j] > 0) { - ep4_add(r, r, q[i]); + ep4_add(r, r, t[i][naf[i][j] / 2]); } if (naf[i][j] < 0) { - ep4_sub(r, r, q[i]); + ep4_sub(r, r, t[i][-naf[i][j] / 2]); } } } @@ -147,11 +164,13 @@ static void ep4_mul_glv_imp(ep4_t r, const ep4_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - for (int i = 0; i < 8; i++) { + ep4_free(q); + for (size_t i = 0; i < 8; i++) { bn_free(_k[i]); - ep4_free(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep4_free(t[i][j]); + } } - } } @@ -160,25 +179,25 @@ static void ep4_mul_glv_imp(ep4_t r, const ep4_t p, const bn_t k) { #if EP_MUL == LWREG || !defined(STRIP) static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { - int8_t reg[8][RLC_FP_BITS + 1], b[8], s[8], c0, n0; - ep4_t q, w, t[8][1 << (RLC_WIDTH - 2)]; + size_t l, c = 2, m = 8; bn_t n, _k[8], u; - size_t l, len, _l[8]; + int8_t even[2], col, sac[2][4 * (RLC_FP_BITS + 1)]; + ep4_t q[8], t[2][1 << 3]; bn_null(n); bn_null(u); - ep4_null(q); - ep4_null(w); RLC_TRY { bn_new(n); bn_new(u); - ep4_new(q); - ep4_new(w); - for (size_t i = 0; i < 8; i++) { + for (int i = 0; i < 8; i++) { bn_null(_k[i]); + ep4_null(q[i]); bn_new(_k[i]); - for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep4_new(q[i]); + } + for (size_t i = 0; i < c; i++) { + for (int j = 0; j < (1 << 3); j++) { ep4_null(t[i][j]); ep4_new(t[i][j]); } @@ -189,70 +208,88 @@ static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { bn_mod(_k[0], k, n); bn_rec_frb(_k, 8, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - l = 0; - /* Make some extra room for BN curves that grow subscalars by 1. */ - len = bn_bits(u) + (ep_curve_is_pairf() == EP_BN); - ep4_norm(t[0][0], p); + ep4_norm(q[0], p); + for (size_t i = 1; i < 8; i++) { + ep4_psi(q[i], q[i - 1]); + } for (size_t i = 0; i < 8; i++) { - s[i] = bn_sign(_k[i]); + ep4_neg(r, q[i]); + fp4_copy_sec(q[i]->y, r->y, bn_sign(_k[i]) == RLC_NEG); bn_abs(_k[i], _k[i]); - b[i] = bn_is_even(_k[i]); - _k[i]->dp[0] |= b[i]; - - _l[i] = RLC_FP_BITS + 1; - bn_rec_reg(reg[i], &_l[i], _k[i], len, RLC_WIDTH); - l = RLC_MAX(l, _l[i]); - - /* Apply Frobenius before flipping sign to build table. */ - if (i > 0) { - ep4_psi(t[i][0], t[i - 1][0]); - } } - - for (size_t i = 0; i < 8; i++) { - ep4_neg(q, t[i][0]); - fp4_copy_sec(q->y, t[i][0]->y, s[i] == RLC_POS); - ep4_tab(t[i], q, RLC_WIDTH); + for (size_t i = 0; i < c; i++) { + even[i] = bn_is_even(_k[i * m / c]); + bn_add_dig(_k[i * m / c], _k[i * m / c], even[i]); + } + + for (size_t i = 0; i < c; i++) { + ep4_copy(t[i][0], q[i * m / c]); + for (size_t j = 1; j < (1 << 3); j++) { + l = util_bits_dig(j); + ep4_add(t[i][j], t[i][j ^ (1 << (l - 1))], q[l + i * m / c]); + } + l = RLC_FP_BITS + 1; + bn_rec_sac(sac[i], &l, _k + i * m / c, m / c, bn_bits(n)); } #if defined(EP_MIXED) - fp4_set_dig(w->z, 1); - w->coord = BASIC; + for (size_t i = 0; i < c; i++) { + ep4_norm_sim(t[i] + 1, t[i] + 1, (1 << 3) - 1); + } + fp4_set_dig(r->z, 1); + fp4_set_dig(q[1]->z, 1); + r->coord = q[1]->coord = BASIC; #else - w->coord = = EP_ADD; + r->coord = q[1]->coord = EP_ADD; #endif ep4_set_infty(r); - for (int j = l - 1; j >= 0; j--) { - for (size_t i = 0; i < RLC_WIDTH - 1; i++) { - ep4_dbl(r, r); + for (size_t i = 0; i < c; i++) { + col = 0; + for (int j = 3; j > 0; j--) { + col <<= 1; + col += sac[i][j * l + l - 1]; } + for (size_t m = 0; m < (1 << 3); m++) { + fp4_copy_sec(q[1]->x, t[i][m]->x, m == col); + fp4_copy_sec(q[1]->y, t[i][m]->y, m == col); +#if !defined(EP_MIXED) + fp4_copy_sec(q[1]->z, t[i][m]->z, m == col); +#endif + } + ep4_neg(q[2], q[1]); + fp4_copy_sec(q[1]->y, q[2]->y, sac[i][l - 1]); + ep4_add(r, r, q[1]); + } - for (size_t i = 0; i < 8; i++) { - n0 = reg[i][j]; - c0 = (n0 >> 7); - n0 = ((n0 ^ c0) - c0) >> 1; - - for (size_t m = 0; m < (1 << (RLC_WIDTH - 2)); m++) { - fp4_copy_sec(w->x, t[i][m]->x, m == n0); - fp4_copy_sec(w->y, t[i][m]->y, m == n0); - #if !defined(EP_MIXED) - fp4_copy_sec(w->z, t[i][m]->z, m == n0); - #endif - } + for (int j = l - 2; j >= 0; j--) { + ep4_dbl(r, r); - ep4_neg(q, w); - fp4_copy_sec(q->y, w->y, c0 == 0); - ep4_add(r, r, q); + for (size_t i = 0; i < c; i++) { + col = 0; + for (int k = 3; k > 0; k--) { + col <<= 1; + col += sac[i][k * l + j]; + } + + for (size_t m = 0; m < (1 << 3); m++) { + fp4_copy_sec(q[1]->x, t[i][m]->x, m == col); + fp4_copy_sec(q[1]->y, t[i][m]->y, m == col); +#if !defined(EP_MIXED) + fp4_copy_sec(q[1]->z, t[i][m]->z, m == col); +#endif + } + ep4_neg(q[2], q[1]); + fp4_copy_sec(q[1]->y, q[2]->y, sac[i][j]); + ep4_add(r, r, q[1]); } } - for (size_t i = 0; i < 8; i++) { - /* Tables are built with points already negated, so no need here. */ - ep4_sub(q, r, t[i][0]); - fp4_copy_sec(r->x, q->x, b[i]); - fp4_copy_sec(r->y, q->y, b[i]); - fp4_copy_sec(r->z, q->z, b[i]); + for (size_t i = 0; i < c; i++) { + ep4_sub(q[1], r, q[i * m / c]); + fp4_copy_sec(r->x, q[1]->x, even[i]); + fp4_copy_sec(r->y, q[1]->y, even[i]); + fp4_copy_sec(r->z, q[1]->z, even[i]); } /* Convert r to affine coordinates. */ @@ -264,11 +301,12 @@ static void ep4_mul_reg_gls(ep4_t r, const ep4_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - ep4_free(q); - ep4_free(w); - for (int i = 0; i < 4; i++) { + for (int i = 0; i < 8; i++) { bn_free(_k[i]); - for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep4_free(q[i]); + } + for (size_t i = 0; i < c; i++) { + for (int j = 0; j < (1 << 3); j++) { ep4_free(t[i][j]); } } @@ -647,7 +685,7 @@ void ep4_mul_lwnaf(ep4_t r, const ep4_t p, const bn_t k) { #if defined(EP_ENDOM) if (ep_curve_is_endom()) { - ep4_mul_glv_imp(r, p, k); + ep4_mul_gls_imp(r, p, k); return; } #endif diff --git a/src/epx/relic_ep8_mul.c b/src/epx/relic_ep8_mul.c index 5300e8933..6ca09592e 100644 --- a/src/epx/relic_ep8_mul.c +++ b/src/epx/relic_ep8_mul.c @@ -40,23 +40,27 @@ #if EP_MUL == LWNAF || !defined(STRIP) -static void ep8_mul_glv_imp(ep8_t r, const ep8_t p, const bn_t k) { +static void ep8_mul_gls_imp(ep8_t r, const ep8_t p, const bn_t k) { size_t l, _l[16]; bn_t n, _k[16], u; int8_t naf[16][RLC_FP_BITS + 1]; - ep8_t q[16]; + ep8_t q, t[16][1 << (RLC_WIDTH - 2)]; bn_null(n); bn_null(u); + ep8_null(q); RLC_TRY { bn_new(n); bn_new(u); - for (int i = 0; i < 16; i++) { + ep8_new(q); + for (size_t i = 0; i < 16; i++) { bn_null(_k[i]); - ep8_null(q[i]); bn_new(_k[i]); - ep8_new(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep8_null(t[i][j]); + ep8_new(t[i][j]); + } } ep8_curve_get_ord(n); @@ -64,31 +68,37 @@ static void ep8_mul_glv_imp(ep8_t r, const ep8_t p, const bn_t k) { bn_mod(_k[0], k, n); bn_rec_frb(_k, 16, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - ep8_norm(q[0], p); - for (size_t i = 1; i < 16; i++) { - ep8_frb(q[i], q[i - 1], 1); - } - l = 0; for (size_t i = 0; i < 16; i++) { - if (bn_sign(_k[i]) == RLC_NEG) { - ep8_neg(q[i], q[i]); - } _l[i] = RLC_FP_BITS + 1; - bn_rec_naf(naf[i], &_l[i], _k[i], 2); + bn_rec_naf(naf[i], &_l[i], _k[i], RLC_WIDTH); l = RLC_MAX(l, _l[i]); + if (i == 0) { + ep8_norm(q, p); + if (bn_sign(_k[0]) == RLC_NEG) { + ep8_neg(q, q); + } + ep8_tab(t[0], q, RLC_WIDTH); + } else { + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep8_frb(t[i][j], t[i - 1][j], 1); + if (bn_sign(_k[i]) != bn_sign(_k[i - 1])) { + ep8_neg(t[i][j], t[i][j]); + } + } + } } ep8_set_infty(r); for (int j = l - 1; j >= 0; j--) { ep8_dbl(r, r); - for (int i = 0; i < 16; i++) { + for (size_t i = 0; i < 16; i++) { if (naf[i][j] > 0) { - ep8_add(r, r, q[i]); + ep8_add(r, r, t[i][naf[i][j] / 2]); } if (naf[i][j] < 0) { - ep8_sub(r, r, q[i]); + ep8_sub(r, r, t[i][-naf[i][j] / 2]); } } } @@ -102,9 +112,12 @@ static void ep8_mul_glv_imp(ep8_t r, const ep8_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - for (int i = 0; i < 16; i++) { + ep8_free(q); + for (size_t i = 0; i < 16; i++) { bn_free(_k[i]); - ep8_free(q[i]); + for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep8_free(t[i][j]); + } } } } @@ -114,25 +127,25 @@ static void ep8_mul_glv_imp(ep8_t r, const ep8_t p, const bn_t k) { #if EP_MUL == LWREG || !defined(STRIP) static void ep8_mul_reg_gls(ep8_t r, const ep8_t p, const bn_t k) { - int8_t reg[16][RLC_FP_BITS + 1], b[16], s[16], c0, n0; - ep8_t q, w, t[16][1 << (RLC_WIDTH - 2)]; + size_t l, c = 4, m = 16; bn_t n, _k[16], u; - size_t l, len, _l[16]; + int8_t even[4], col, sac[4][4 * (RLC_FP_BITS + 1)]; + ep8_t q[16], t[4][1 << 3]; bn_null(n); bn_null(u); - ep8_null(q); - ep8_null(w); RLC_TRY { bn_new(n); bn_new(u); - ep8_new(q); - ep8_new(w); - for (size_t i = 0; i < 16; i++) { + for (int i = 0; i < 16; i++) { bn_null(_k[i]); + ep8_null(q[i]); bn_new(_k[i]); - for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep8_new(q[i]); + } + for (size_t i = 0; i < c; i++) { + for (int j = 0; j < (1 << 3); j++) { ep8_null(t[i][j]); ep8_new(t[i][j]); } @@ -143,70 +156,88 @@ static void ep8_mul_reg_gls(ep8_t r, const ep8_t p, const bn_t k) { bn_mod(_k[0], k, n); bn_rec_frb(_k, 16, _k[0], u, n, ep_curve_is_pairf() == EP_BN); - l = 0; - /* Make some extra room for BN curves that grow subscalars by 1. */ - len = bn_bits(u) + (ep_curve_is_pairf() == EP_BN); - ep8_norm(t[0][0], p); + ep8_norm(q[0], p); + for (size_t i = 1; i < 16; i++) { + ep8_frb(q[i], q[i - 1], 1); + } for (size_t i = 0; i < 16; i++) { - s[i] = bn_sign(_k[i]); + ep8_neg(r, q[i]); + fp8_copy_sec(q[i]->y, r->y, bn_sign(_k[i]) == RLC_NEG); bn_abs(_k[i], _k[i]); - b[i] = bn_is_even(_k[i]); - _k[i]->dp[0] |= b[i]; - - _l[i] = RLC_FP_BITS + 1; - bn_rec_reg(reg[i], &_l[i], _k[i], len, RLC_WIDTH); - l = RLC_MAX(l, _l[i]); - - /* Apply Frobenius before flipping sign to build table. */ - if (i > 0) { - ep8_frb(t[i][0], t[i - 1][0], 1); - } } - - for (size_t i = 0; i < 16; i++) { - ep8_neg(q, t[i][0]); - fp8_copy_sec(q->y, t[i][0]->y, s[i] == RLC_POS); - ep8_tab(t[i], q, RLC_WIDTH); + for (size_t i = 0; i < c; i++) { + even[i] = bn_is_even(_k[i * m / c]); + bn_add_dig(_k[i * m / c], _k[i * m / c], even[i]); + } + + for (size_t i = 0; i < c; i++) { + ep8_copy(t[i][0], q[i * m / c]); + for (size_t j = 1; j < (1 << 3); j++) { + l = util_bits_dig(j); + ep8_add(t[i][j], t[i][j ^ (1 << (l - 1))], q[l + i * m / c]); + } + l = RLC_FP_BITS + 1; + bn_rec_sac(sac[i], &l, _k + i * m / c, m / c, bn_bits(n)); } #if defined(EP_MIXED) - fp8_set_dig(w->z, 1); - w->coord = BASIC; + for (size_t i = 0; i < c; i++) { + ep8_norm_sim(t[i] + 1, t[i] + 1, (1 << 3) - 1); + } + fp8_set_dig(r->z, 1); + fp8_set_dig(q[1]->z, 1); + r->coord = q[1]->coord = BASIC; #else - w->coord = = EP_ADD; + r->coord = q[1]->coord = EP_ADD; #endif ep8_set_infty(r); - for (int j = l - 1; j >= 0; j--) { - for (size_t i = 0; i < RLC_WIDTH - 1; i++) { - ep8_dbl(r, r); + for (size_t i = 0; i < c; i++) { + col = 0; + for (int j = 3; j > 0; j--) { + col <<= 1; + col += sac[i][j * l + l - 1]; } + for (size_t m = 0; m < (1 << 3); m++) { + fp8_copy_sec(q[1]->x, t[i][m]->x, m == col); + fp8_copy_sec(q[1]->y, t[i][m]->y, m == col); +#if !defined(EP_MIXED) + fp8_copy_sec(q[1]->z, t[i][m]->z, m == col); +#endif + } + ep8_neg(q[2], q[1]); + fp8_copy_sec(q[1]->y, q[2]->y, sac[i][l - 1]); + ep8_add(r, r, q[1]); + } - for (size_t i = 0; i < 16; i++) { - n0 = reg[i][j]; - c0 = (n0 >> 7); - n0 = ((n0 ^ c0) - c0) >> 1; - - for (size_t m = 0; m < (1 << (RLC_WIDTH - 2)); m++) { - fp8_copy_sec(w->x, t[i][m]->x, m == n0); - fp8_copy_sec(w->y, t[i][m]->y, m == n0); - #if !defined(EP_MIXED) - fp8_copy_sec(w->z, t[i][m]->z, m == n0); - #endif - } + for (int j = l - 2; j >= 0; j--) { + ep8_dbl(r, r); - ep8_neg(q, w); - fp8_copy_sec(q->y, w->y, c0 == 0); - ep8_add(r, r, q); + for (size_t i = 0; i < c; i++) { + col = 0; + for (int k = 3; k > 0; k--) { + col <<= 1; + col += sac[i][k * l + j]; + } + + for (size_t m = 0; m < (1 << 3); m++) { + fp8_copy_sec(q[1]->x, t[i][m]->x, m == col); + fp8_copy_sec(q[1]->y, t[i][m]->y, m == col); +#if !defined(EP_MIXED) + fp8_copy_sec(q[1]->z, t[i][m]->z, m == col); +#endif + } + ep8_neg(q[2], q[1]); + fp8_copy_sec(q[1]->y, q[2]->y, sac[i][j]); + ep8_add(r, r, q[1]); } } - for (size_t i = 0; i < 16; i++) { - /* Tables are built with points already negated, so no need here. */ - ep8_sub(q, r, t[i][0]); - fp8_copy_sec(r->x, q->x, b[i]); - fp8_copy_sec(r->y, q->y, b[i]); - fp8_copy_sec(r->z, q->z, b[i]); + for (size_t i = 0; i < c; i++) { + ep8_sub(q[1], r, q[i * m / c]); + fp8_copy_sec(r->x, q[1]->x, even[i]); + fp8_copy_sec(r->y, q[1]->y, even[i]); + fp8_copy_sec(r->z, q[1]->z, even[i]); } /* Convert r to affine coordinates. */ @@ -218,11 +249,12 @@ static void ep8_mul_reg_gls(ep8_t r, const ep8_t p, const bn_t k) { RLC_FINALLY { bn_free(n); bn_free(u); - ep8_free(q); - ep8_free(w); for (int i = 0; i < 16; i++) { bn_free(_k[i]); - for (size_t j = 0; j < (1 << (RLC_WIDTH - 2)); j++) { + ep8_free(q[i]); + } + for (size_t i = 0; i < c; i++) { + for (int j = 0; j < (1 << 3); j++) { ep8_free(t[i][j]); } } @@ -595,7 +627,7 @@ void ep8_mul_lwnaf(ep8_t r, const ep8_t p, const bn_t k) { #if defined(EP_ENDOM) if (ep_curve_is_endom()) { - ep8_mul_glv_imp(r, p, k); + ep8_mul_gls_imp(r, p, k); return; } #endif diff --git a/src/low/x64-asm-8l/relic_bn_mul_low.c b/src/low/x64-asm-8l/relic_bn_mul_low.c index bacd3d645..2c8c26e29 100644 --- a/src/low/x64-asm-8l/relic_bn_mul_low.c +++ b/src/low/x64-asm-8l/relic_bn_mul_low.c @@ -53,8 +53,8 @@ void bn_muln_low(dig_t *c, const dig_t *a, const dig_t *b, size_t size) { } void bn_muld_low(dig_t *c, const dig_t *a, size_t sa, const dig_t *b, size_t sb, - int low, int high) { + uint_t low, uint_t high) { (void)low; (void)high; - mpn_mul(c, a, sizea, b, sizeb); + mpn_mul(c, a, sa, b, sb); } diff --git a/src/pc/relic_pc_exp.c b/src/pc/relic_pc_exp.c index 05d854385..daa5d517d 100644 --- a/src/pc/relic_pc_exp.c +++ b/src/pc/relic_pc_exp.c @@ -114,21 +114,20 @@ static void gt_psi(gt_t c, const gt_t a) { * @param[in] b - the exponent. * @param[in] f - the maximum Frobenius power. */ -void gt_exp_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { - int8_t c0, n0, *reg = RLC_ALLOCA(int8_t, f * (RLC_FP_BITS + 1)); - int8_t *e = RLC_ALLOCA(int8_t, f), *s = RLC_ALLOCA(int8_t, f); - gt_t q, w, *t = RLC_ALLOCA(gt_t, f * RLC_GT_TABLE); +void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { + int8_t *naf = RLC_ALLOCA(int8_t, f * (RLC_FP_BITS + 1)); + int8_t n0, *s = RLC_ALLOCA(int8_t, f); + gt_t q, *t = RLC_ALLOCA(gt_t, f * RLC_GT_TABLE); bn_t n, u, *_b = RLC_ALLOCA(bn_t, f); - size_t l, len, *_l = RLC_ALLOCA(size_t, f); + size_t l, *_l = RLC_ALLOCA(size_t, f); - if (reg == NULL || e == NULL || t == NULL || _b == NULL || _l == NULL) { + if (naf == NULL || t == NULL || _b == NULL || _l == NULL) { RLC_THROW(ERR_NO_MEMORY); return; } if (bn_is_zero(b)) { - RLC_FREE(reg); - RLC_FREE(e); + RLC_FREE(naf); RLC_FREE(s); RLC_FREE(t); RLC_FREE(_b); @@ -139,13 +138,11 @@ void gt_exp_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_null(n); bn_null(u); gt_null(q); - gt_null(w); RLC_TRY { bn_new(n); bn_new(u); gt_new(q); - gt_new(w); for (size_t i = 0; i < f; i++) { bn_null(_b[i]); bn_new(_b[i]); @@ -171,65 +168,71 @@ void gt_exp_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_rec_frb(_b, f, _b[0], u, n, ep_curve_is_pairf() == EP_BN); l = 0; - len = bn_bits(u) + (ep_curve_is_pairf() == EP_BN); - gt_copy(t[0], a); for (size_t i = 0; i < f; i++) { s[i] = bn_sign(_b[i]); - bn_abs(_b[i], _b[i]); - e[i] = bn_is_even(_b[i]); - _b[i]->dp[0] |= e[i]; - _l[i] = RLC_FP_BITS + 1; - bn_rec_reg(reg + i * (RLC_FP_BITS + 1), &_l[i], _b[i], len, RLC_WIDTH); + bn_rec_naf(naf + i * (RLC_FP_BITS + 1), &_l[i], _b[i], RLC_WIDTH); l = RLC_MAX(l, _l[i]); - /* Apply Frobenius before flipping sign to build table. */ - if (i > 0) { - gt_psi(t[i * RLC_GT_TABLE], t[(i - 1) * RLC_GT_TABLE]); - } } - for (size_t i = 0; i < f; i++) { - gt_inv(q, t[i * RLC_GT_TABLE]); - gt_copy_sec(q, t[i * RLC_GT_TABLE], s[i] == RLC_POS); + if (ep_curve_is_pairf() == EP_K16 || ep_curve_embed() == 18) { + gt_copy(t[0], a); + for (size_t i = 1; i < f; i++) { + gt_psi(t[i * RLC_GT_TABLE], t[(i - 1) * RLC_GT_TABLE]); + } + for (size_t i = 0; i < f; i++) { + gt_copy(q, t[i * RLC_GT_TABLE]); + if (s[i] == RLC_NEG) { + gt_inv(q, t[i * RLC_GT_TABLE]); + } + if (RLC_WIDTH > 2) { + gt_sqr(t[i * RLC_GT_TABLE], q); + gt_mul(t[i * RLC_GT_TABLE + 1], t[i * RLC_GT_TABLE], q); + for (size_t j = 2; j < RLC_GT_TABLE; j++) { + gt_mul(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j - 1], + t[i * (RLC_GT_TABLE)]); + } + } + gt_copy(t[i * RLC_GT_TABLE], q); + } + } else { + gt_copy(q, a); + if (bn_sign(_b[0]) == RLC_NEG) { + gt_inv(q, q); + } if (RLC_WIDTH > 2) { - gt_sqr(t[i * RLC_GT_TABLE], q); - gt_mul(t[i * RLC_GT_TABLE + 1], t[i * RLC_GT_TABLE], q); + gt_sqr(t[0], q); + gt_mul(t[1], t[0], q); for (size_t j = 2; j < RLC_GT_TABLE; j++) { - gt_mul(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j - 1], - t[i * (RLC_GT_TABLE)]); + gt_mul(t[j], t[j - 1], t[0]); + } + } + gt_copy(t[0], q); + for (size_t i = 1; i < f; i++) { + for (size_t j = 0; j < RLC_GT_TABLE; j++) { + gt_psi(t[i * RLC_GT_TABLE + j], t[(i - 1) * RLC_GT_TABLE + j]); + if (s[i] != s[i - 1]) { + gt_inv(t[i * RLC_GT_TABLE + j], t[i * RLC_GT_TABLE + j]); + } } } - gt_copy(t[i * RLC_GT_TABLE], q); } gt_set_unity(c); for (int j = l - 1; j >= 0; j--) { - for (size_t i = 0; i < RLC_WIDTH - 1; i++) { - gt_sqr(c, c); - } + gt_sqr(c, c); for (size_t i = 0; i < f; i++) { - n0 = reg[i * (RLC_FP_BITS + 1) + j]; - c0 = (n0 >> 7); - n0 = ((n0 ^ c0) - c0) >> 1; - - for (size_t m = 0; m < RLC_GT_TABLE; m++) { - gt_copy_sec(w, t[i * RLC_GT_TABLE + m], m == n0); + n0 = naf[i * (RLC_FP_BITS + 1) + j]; + if (n0 > 0) { + gt_mul(c, c, t[i * RLC_GT_TABLE + n0 / 2]); + } + if (n0 < 0) { + gt_inv(q, t[i * RLC_GT_TABLE - n0 / 2]); + gt_mul(c, c, q); } - - gt_inv(q, w); - gt_copy_sec(q, w, c0 == 0); - gt_mul(c, c, q); - } } - - for (size_t i = 0; i < f; i++) { - /* Tables are built with points already negated, so no need here. */ - gt_inv(q, t[i * RLC_GT_TABLE]); - gt_mul(q, c, q); - gt_copy_sec(c, q, e[i]); - } } RLC_CATCH_ANY { RLC_THROW(ERR_CAUGHT); @@ -238,15 +241,13 @@ void gt_exp_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_free(n); bn_free(u); gt_free(q); - gt_free(w); for (size_t i = 0; i < f; i++) { bn_free(_b[i]); for (size_t j = 0; j < RLC_GT_TABLE; j++) { gt_free(t[i * RLC_GT_TABLE + j]); } } - RLC_FREE(reg); - RLC_FREE(e); + RLC_FREE(naf); RLC_FREE(s); RLC_FREE(t); RLC_FREE(_b); @@ -254,10 +255,132 @@ void gt_exp_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { } } +#if FP_PRIME < 1536 + /** - * Size of a precomputation table using the double-table comb method. + * Exponentiates an element from G_T in constant time. + * + * @param[out] c - the result. + * @param[in] a - the element to exponentiate. + * @param[in] b - the exponent. + * @param[in] f - the maximum Frobenius power. */ -#define RLC_GT_TABLE (1 << (RLC_WIDTH - 2)) +static void gt_exp_reg_gls(gt_t c, const gt_t a, const bn_t b, size_t d, + size_t f) { + size_t l, s = (1 << (f / d - 1)); + bn_t n, *_b = RLC_ALLOCA(bn_t, f), u; + int8_t col, *e = RLC_ALLOCA(int8_t, d); + int8_t *sac = RLC_ALLOCA(int8_t, d * f * RLC_FP_BITS); + gt_t *q = RLC_ALLOCA(gt_t, f), *t = RLC_ALLOCA(gt_t, d * s); + + if (sac == NULL || e == NULL || t == NULL || _b == NULL || q == NULL) { + RLC_THROW(ERR_NO_MEMORY); + return; + } + + bn_null(n); + bn_null(u); + + RLC_TRY { + bn_new(n); + bn_new(u); + for (int i = 0; i < f; i++) { + bn_null(_b[i]); + gt_null(q[i]); + bn_new(_b[i]); + gt_new(q[i]); + } + for (size_t i = 0; i < d; i++) { + for (int j = 0; j < s; j++) { + gt_null(t[i * s + j]); + gt_new(t[i * s + j]); + } + } + + gt_get_ord(n); + fp_prime_get_par(u); + if (ep_curve_is_pairf() == EP_SG18) { + /* Compute base -3*u for the recoding below. */ + bn_dbl(n, u); + bn_add(u, u, n); + bn_neg(u, u); + } + bn_mod(_b[0], b, n); + bn_rec_frb(_b, f, _b[0], u, n, ep_curve_is_pairf() == EP_BN); + + gt_copy(q[0], a); + for (size_t i = 1; i < f; i++) { + gt_psi(q[i], q[i - 1]); + } + for (size_t i = 0; i < f; i++) { + gt_inv(c, q[i]); + gt_copy_sec(q[i], c, bn_sign(_b[i]) == RLC_NEG); + bn_abs(_b[i], _b[i]); + } + for (size_t i = 0; i < d; i++) { + e[i] = bn_is_even(_b[i * f / d]); + bn_add_dig(_b[i * f / d], _b[i * f / d], e[i]); + } + + for (size_t i = 0; i < d; i++) { + gt_copy(t[i * s], q[i * f / d]); + for (size_t j = 1; j < s; j++) { + l = util_bits_dig(j); + gt_mul(t[i * s + j], t[i * s + (j ^ (1 << (l - 1)))], q[l + i * f / d]); + } + l = RLC_FP_BITS; + bn_rec_sac(sac + i * f * RLC_FP_BITS, &l, _b + i * f / d, f / d, bn_bits(n)); + } + + gt_set_unity(c); + for (int j = l - 1; j >= 0; j--) { + gt_sqr(c, c); + for (size_t i = 0; i < d; i++) { + col = 0; + for (int k = f / d - 1; k > 0; k--) { + col <<= 1; + col += sac[i * f * RLC_FP_BITS + k * l + j]; + } + + for (size_t m = 0; m < s; m++) { + gt_copy_sec(q[1], t[i * s + m], m == col); + } + gt_inv(q[2], q[1]); + gt_copy_sec(q[1], q[2], sac[i * f * RLC_FP_BITS + j]); + gt_mul(c, c, q[1]); + } + } + + for (size_t i = 0; i < d; i++) { + gt_inv(q[1], q[i * f / d]); + gt_mul(q[1], q[1], c); + gt_copy_sec(c, q[1], e[i]); + } + } + RLC_CATCH_ANY { + RLC_THROW(ERR_CAUGHT); + } + RLC_FINALLY { + bn_free(n); + bn_free(u); + for (int i = 0; i < f; i++) { + bn_free(_b[i]); + gt_free(q[i]); + } + for (size_t i = 0; i < d; i++) { + for (int j = 0; j < s; j++) { + gt_free(t[i * d + j]); + } + } + RLC_FREE(e); + RLC_FREE(_b); + RLC_FREE(q); + RLC_FREE(t); + RLC_FREE(sac); + } +} + +#else /** * Exponentiates an element from G_T in constant time. @@ -267,20 +390,21 @@ void gt_exp_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { * @param[in] b - the exponent. * @param[in] f - the maximum Frobenius power. */ -void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { - int8_t *naf = RLC_ALLOCA(int8_t, f * (RLC_FP_BITS + 1)); - int8_t n0, *s = RLC_ALLOCA(int8_t, f); - gt_t q, *t = RLC_ALLOCA(gt_t, f * RLC_GT_TABLE); +void gt_exp_reg_gls(gt_t c, const gt_t a, const bn_t b, size_t f) { + int8_t c0, n0, *reg = RLC_ALLOCA(int8_t, f * (RLC_FP_BITS + 1)); + int8_t *e = RLC_ALLOCA(int8_t, f), *s = RLC_ALLOCA(int8_t, f); + gt_t q, w, *t = RLC_ALLOCA(gt_t, f * RLC_GT_TABLE); bn_t n, u, *_b = RLC_ALLOCA(bn_t, f); - size_t l, *_l = RLC_ALLOCA(size_t, f); + size_t l, len, *_l = RLC_ALLOCA(size_t, f); - if (naf == NULL || t == NULL || _b == NULL || _l == NULL) { + if (reg == NULL || e == NULL || t == NULL || _b == NULL || _l == NULL) { RLC_THROW(ERR_NO_MEMORY); return; } if (bn_is_zero(b)) { - RLC_FREE(naf); + RLC_FREE(reg); + RLC_FREE(e); RLC_FREE(s); RLC_FREE(t); RLC_FREE(_b); @@ -291,11 +415,13 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_null(n); bn_null(u); gt_null(q); + gt_null(w); RLC_TRY { bn_new(n); bn_new(u); gt_new(q); + gt_new(w); for (size_t i = 0; i < f; i++) { bn_null(_b[i]); bn_new(_b[i]); @@ -321,13 +447,16 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_rec_frb(_b, f, _b[0], u, n, ep_curve_is_pairf() == EP_BN); l = 0; + len = bn_bits(u) + (ep_curve_is_pairf() == EP_BN); gt_copy(t[0], a); for (size_t i = 0; i < f; i++) { s[i] = bn_sign(_b[i]); bn_abs(_b[i], _b[i]); + e[i] = bn_is_even(_b[i]); + _b[i]->dp[0] |= e[i]; _l[i] = RLC_FP_BITS + 1; - bn_rec_naf(naf + i * (RLC_FP_BITS + 1), &_l[i], _b[i], RLC_WIDTH); + bn_rec_reg(reg + i * (RLC_FP_BITS + 1), &_l[i], _b[i], len, RLC_WIDTH); l = RLC_MAX(l, _l[i]); /* Apply Frobenius before flipping sign to build table. */ if (i > 0) { @@ -351,19 +480,32 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { gt_set_unity(c); for (int j = l - 1; j >= 0; j--) { - gt_sqr(c, c); + for (size_t i = 0; i < RLC_WIDTH - 1; i++) { + gt_sqr(c, c); + } for (size_t i = 0; i < f; i++) { - n0 = naf[i * (RLC_FP_BITS + 1) + j]; - if (n0 > 0) { - gt_mul(c, c, t[i * RLC_GT_TABLE + n0 / 2]); - } - if (n0 < 0) { - gt_inv(q, t[i * RLC_GT_TABLE - n0 / 2]); - gt_mul(c, c, q); + n0 = reg[i * (RLC_FP_BITS + 1) + j]; + c0 = (n0 >> 7); + n0 = ((n0 ^ c0) - c0) >> 1; + + for (size_t m = 0; m < RLC_GT_TABLE; m++) { + gt_copy_sec(w, t[i * RLC_GT_TABLE + m], m == n0); } + + gt_inv(q, w); + gt_copy_sec(q, w, c0 == 0); + gt_mul(c, c, q); + } } + + for (size_t i = 0; i < f; i++) { + /* Tables are built with points already negated, so no need here. */ + gt_inv(q, t[i * RLC_GT_TABLE]); + gt_mul(q, c, q); + gt_copy_sec(c, q, e[i]); + } } RLC_CATCH_ANY { RLC_THROW(ERR_CAUGHT); @@ -372,13 +514,15 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { bn_free(n); bn_free(u); gt_free(q); + gt_free(w); for (size_t i = 0; i < f; i++) { bn_free(_b[i]); for (size_t j = 0; j < RLC_GT_TABLE; j++) { gt_free(t[i * RLC_GT_TABLE + j]); } } - RLC_FREE(naf); + RLC_FREE(reg); + RLC_FREE(e); RLC_FREE(s); RLC_FREE(t); RLC_FREE(_b); @@ -386,6 +530,8 @@ void gt_exp_gls_imp(gt_t c, const gt_t a, const bn_t b, size_t f) { } } +#endif + /*============================================================================*/ /* Public definitions */ /*============================================================================*/ @@ -521,8 +667,12 @@ void gt_exp_sec(gt_t c, const gt_t a, const bn_t b) { return; } -#if FP_PRIME <= 1536 - gt_exp_imp(c, a, b, ep_curve_frdim()); +#if FP_PRIME < 1536 + size_t d = ep_curve_frdim(); + d = (d > 4 ? d / 4 : 1); + gt_exp_reg_gls(c, a, b, d, ep_curve_frdim()); +#elif FP_PRIME == 1536 + gt_exp_reg_gls(c, a, b, 1); #else RLC_CAT(RLC_GT_LOWER, exp_monty)(c, a, b); #endif diff --git a/src/pp/relic_pp_map_k54.c b/src/pp/relic_pp_map_k54.c index 17b2b0181..5860fa94a 100644 --- a/src/pp/relic_pp_map_k54.c +++ b/src/pp/relic_pp_map_k54.c @@ -181,7 +181,6 @@ void pp_map_k54(fp54_t r, const ep_t p, const fp9_t qx, const fp9_t qy) { if (bn_sign(a) == RLC_NEG) { fp54_inv_cyc(r, r); } - fp18_print(r[0]); pp_exp_k54(r, r); break; } diff --git a/test/test_bn.c b/test/test_bn.c index 5ac90f83b..70d37a994 100644 --- a/test/test_bn.c +++ b/test/test_bn.c @@ -2292,7 +2292,7 @@ static int recoding(void) { bn_rand_mod(a, b); bn_rec_glv(b, c, a, b, (const bn_t *)v1, (const bn_t *)v2); ep_curve_get_ord(v2[0]); - bn_rec_sac(ptr, &l, v1, 2, v2[0]); + bn_rec_sac(ptr, &l, v1, 2, bn_bits(v2[0])); if (bn_is_even(b)) { bn_add_dig(b, b, 1); }