Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Eliminate scratch memory used when generating contexts #557

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/bench_ecmult.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ int main(int argc, char **argv) {
secp256k1_scalar_add(&data.seckeys[i], &data.seckeys[i - 1], &data.seckeys[i - 1]);
}
}
secp256k1_ge_set_all_gej_var(data.pubkeys, pubkeys_gej, POINTS, &data.ctx->error_callback);
secp256k1_ge_set_all_gej_var(data.pubkeys, pubkeys_gej, POINTS);
free(pubkeys_gej);

for (i = 1; i <= 8; ++i) {
Expand Down
2 changes: 1 addition & 1 deletion src/ecmult_gen_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx
secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej, NULL);
}
}
secp256k1_ge_set_all_gej_var(prec, precj, 1024, cb);
secp256k1_ge_set_all_gej_var(prec, precj, 1024);
}
for (j = 0; j < 64; j++) {
for (i = 0; i < 16; i++) {
Expand Down
143 changes: 127 additions & 16 deletions src/ecmult_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,24 +137,135 @@ static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge *p
secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A), pre, globalz, prej, zr);
}

static void secp256k1_ecmult_odd_multiples_table_storage_var(int n, secp256k1_ge_storage *pre, const secp256k1_gej *a, const secp256k1_callback *cb) {
secp256k1_gej *prej = (secp256k1_gej*)checked_malloc(cb, sizeof(secp256k1_gej) * n);
secp256k1_ge *prea = (secp256k1_ge*)checked_malloc(cb, sizeof(secp256k1_ge) * n);
secp256k1_fe *zr = (secp256k1_fe*)checked_malloc(cb, sizeof(secp256k1_fe) * n);
static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp256k1_ge_storage *pre, const secp256k1_gej *a) {
secp256k1_gej d;
secp256k1_ge d_ge, p_ge;
secp256k1_gej pj;
secp256k1_fe zi;
secp256k1_fe zr;
secp256k1_fe dx_over_dz_squared;
int i;

/* Compute the odd multiples in Jacobian form. */
secp256k1_ecmult_odd_multiples_table(n, prej, zr, a);
/* Convert them in batch to affine coordinates. */
secp256k1_ge_set_table_gej_var(prea, prej, zr, n);
/* Convert them to compact storage form. */
for (i = 0; i < n; i++) {
secp256k1_ge_to_storage(&pre[i], &prea[i]);
VERIFY_CHECK(!a->infinity);

secp256k1_gej_double_var(&d, a, NULL);

/* First, we perform all the additions in an isomorphic curve obtained by multiplying
* all `z` coordinates by 1/`d.z`. In these coordinates `d` is affine so we can use
* `secp256k1_gej_add_ge_var` to perform the additions. For each addition, we store
* the resulting y-coordinate and the z-ratio, since we only have enough memory to
* store two field elements. These are sufficient to efficiently undo the isomorphism
* and recompute all the `x`s.
*/
d_ge.x = d.x;
d_ge.y = d.y;
d_ge.infinity = 0;

secp256k1_ge_set_gej_zinv(&p_ge, a, &d.z);
pj.x = p_ge.x;
pj.y = p_ge.y;
pj.z = a->z;
pj.infinity = 0;

for (i = 0; i < (n - 1); i++) {
secp256k1_fe_normalize_var(&pj.y);
secp256k1_fe_to_storage(&pre[i].y, &pj.y);
secp256k1_gej_add_ge_var(&pj, &pj, &d_ge, &zr);
secp256k1_fe_normalize_var(&zr);
secp256k1_fe_to_storage(&pre[i].x, &zr);
}

free(prea);
free(prej);
free(zr);
/* Invert d.z in the same batch, preserving pj.z so we can extract 1/d.z */
secp256k1_fe_mul(&zi, &pj.z, &d.z);
secp256k1_fe_inv_var(&zi, &zi);

/* Directly set `pre[n - 1]` to `pj`, saving the inverted z-coordinate so
* that we can combine it with the saved z-ratios to compute the other zs
* without any more inversions. */
secp256k1_ge_set_gej_zinv(&p_ge, &pj, &zi);
secp256k1_ge_to_storage(&pre[n - 1], &p_ge);

/* Compute the actual x-coordinate of D, which will be needed below. */
secp256k1_fe_mul(&d.z, &zi, &pj.z); /* d.z = 1/d.z */
secp256k1_fe_sqr(&dx_over_dz_squared, &d.z);
secp256k1_fe_mul(&dx_over_dz_squared, &dx_over_dz_squared, &d.x);

/* Going into the second loop, we have set `pre[n-1]` to its final affine
* form, but still need to set `pre[i]` for `i` in 0 through `n-2`. We
* have `zi = (p.z * d.z)^-1`, where
*
* `p.z` is the z-coordinate of the point on the isomorphic curve
* which was ultimately assigned to `pre[n-1]`.
* `d.z` is the multiplier that must be applied to all z-coordinates
* to move from our isomorphic curve back to secp256k1; so the
* product `p.z * d.z` is the z-coordinate of the secp256k1
* point assigned to `pre[n-1]`.
*
* All subsequent inverse-z-coordinates can be obtained by multiplying this
* factor by successive z-ratios, which is much more efficient than directly
* computing each one.
*
* Importantly, these inverse-zs will be coordinates of points on secp256k1,
* while our other stored values come from computations on the isomorphic
* curve. So in the below loop, we will take care not to actually use `zi`
* or any derived values until we're back on secp256k1.
*/
i = n - 1;
while (i > 0) {
secp256k1_fe zi2, zi3;
const secp256k1_fe *rzr;
i--;

secp256k1_ge_from_storage(&p_ge, &pre[i]);

/* For each remaining point, we extract the z-ratio from the stored
* x-coordinate, compute its z^-1 from that, and compute the full
* point from that. */
rzr = &p_ge.x;
secp256k1_fe_mul(&zi, &zi, rzr);
secp256k1_fe_sqr(&zi2, &zi);
secp256k1_fe_mul(&zi3, &zi2, &zi);
/* To compute the actual x-coordinate, we use the stored z ratio and
* y-coordinate, which we obtained from `secp256k1_gej_add_ge_var`
* in the loop above, as well as the inverse of the square of its
* z-coordinate. We store the latter in the `zi2` variable, which is
* computed iteratively starting from the overall Z inverse then
* multiplying by each z-ratio in turn.
*
* Denoting the z-ratio as `rzr`, we observe that it is equal to `h`
* from the inside of the above `gej_add_ge_var` call. This satisfies
*
* rzr = d_x * z^2 - x * d_z^2
*
* where (`d_x`, `d_z`) are Jacobian coordinates of `D` and `(x, z)`
* are Jacobian coordinates of our desired point -- except both are on
* the isomorphic curve that we were using when we called `gej_add_ge_var`.
* To get back to secp256k1, we must multiply both `z`s by `d_z`, or
* equivalently divide both `x`s by `d_z^2`. Our equation then becomes
*
* rzr = d_x * z^2 / d_z^2 - x
*
* (The left-hand-side, being a ratio of z-coordinates, is unaffected
* by the isomorphism.)
*
* Rearranging to solve for `x`, we have
*
* x = d_x * z^2 / d_z^2 - rzr
*
* But what we actually want is the affine coordinate `X = x/z^2`,
* which will satisfy
*
* X = d_x / d_z^2 - rzr / z^2
* = dx_over_dz_squared - rzr * zi2
*/
secp256k1_fe_mul(&p_ge.x, rzr, &zi2);
secp256k1_fe_negate(&p_ge.x, &p_ge.x, 1);
secp256k1_fe_add(&p_ge.x, &dx_over_dz_squared);
/* y is stored_y/z^3, as we expect */
secp256k1_fe_mul(&p_ge.y, &p_ge.y, &zi3);
/* Store */
secp256k1_ge_to_storage(&pre[i], &p_ge);
}
}

/** The following two macro retrieves a particular odd multiple from a table
Expand Down Expand Up @@ -202,7 +313,7 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context *ctx, const
ctx->pre_g = (secp256k1_ge_storage (*)[])checked_malloc(cb, sizeof((*ctx->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G));

/* precompute the tables with odd multiples */
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj, cb);
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj);

#ifdef USE_ENDOMORPHISM
{
Expand All @@ -216,7 +327,7 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context *ctx, const
for (i = 0; i < 128; i++) {
secp256k1_gej_double_var(&g_128j, &g_128j, NULL);
}
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j, cb);
secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j);
}
#endif
}
Expand Down
7 changes: 1 addition & 6 deletions src/group.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,7 @@ static void secp256k1_ge_neg(secp256k1_ge *r, const secp256k1_ge *a);
static void secp256k1_ge_set_gej(secp256k1_ge *r, secp256k1_gej *a);

/** Set a batch of group elements equal to the inputs given in jacobian coordinates */
static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len, const secp256k1_callback *cb);

/** Set a batch of group elements equal to the inputs given in jacobian
* coordinates (with known z-ratios). zr must contain the known z-ratios such
* that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. */
static void secp256k1_ge_set_table_gej_var(secp256k1_ge *r, const secp256k1_gej *a, const secp256k1_fe *zr, size_t len);
static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len);

/** Bring a batch inputs given in jacobian coordinates (with known z-ratios) to
* the same global z "denominator". zr must contain the known z-ratios such
Expand Down
57 changes: 27 additions & 30 deletions src/group_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,46 +126,43 @@ static void secp256k1_ge_set_gej_var(secp256k1_ge *r, secp256k1_gej *a) {
r->y = a->y;
}

static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len, const secp256k1_callback *cb) {
secp256k1_fe *az;
secp256k1_fe *azi;
static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len) {
secp256k1_fe u;
size_t i;
size_t count = 0;
az = (secp256k1_fe *)checked_malloc(cb, sizeof(secp256k1_fe) * len);
size_t last_i = SIZE_MAX;

for (i = 0; i < len; i++) {
if (!a[i].infinity) {
az[count++] = a[i].z;
/* Use destination's x coordinates as scratch space */
if (last_i == SIZE_MAX) {
r[i].x = a[i].z;
} else {
secp256k1_fe_mul(&r[i].x, &r[last_i].x, &a[i].z);
}
last_i = i;
}
}
if (last_i == SIZE_MAX) {
return;
}
secp256k1_fe_inv_var(&u, &r[last_i].x);

azi = (secp256k1_fe *)checked_malloc(cb, sizeof(secp256k1_fe) * count);
secp256k1_fe_inv_all_var(azi, az, count);
free(az);

count = 0;
for (i = 0; i < len; i++) {
r[i].infinity = a[i].infinity;
i = last_i;
while (i > 0) {
i--;
if (!a[i].infinity) {
secp256k1_ge_set_gej_zinv(&r[i], &a[i], &azi[count++]);
secp256k1_fe_mul(&r[last_i].x, &r[i].x, &u);
secp256k1_fe_mul(&u, &u, &a[last_i].z);
last_i = i;
}
}
free(azi);
}
VERIFY_CHECK(!a[last_i].infinity);
r[last_i].x = u;

static void secp256k1_ge_set_table_gej_var(secp256k1_ge *r, const secp256k1_gej *a, const secp256k1_fe *zr, size_t len) {
size_t i = len - 1;
secp256k1_fe zi;

if (len > 0) {
/* Compute the inverse of the last z coordinate, and use it to compute the last affine output. */
secp256k1_fe_inv(&zi, &a[i].z);
secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zi);

/* Work out way backwards, using the z-ratios to scale the x/y values. */
while (i > 0) {
secp256k1_fe_mul(&zi, &zi, &zr[i]);
i--;
secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zi);
for (i = 0; i < len; i++) {
r[i].infinity = a[i].infinity;
if (!a[i].infinity) {
secp256k1_ge_set_gej_zinv(&r[i], &a[i], &r[i].x);
}
}
}
Expand Down
22 changes: 17 additions & 5 deletions src/tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -2095,28 +2095,40 @@ void test_ge(void) {
/* Test batch gej -> ge conversion with and without known z ratios. */
{
secp256k1_fe *zr = (secp256k1_fe *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_fe));
secp256k1_ge *ge_set_table = (secp256k1_ge *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_ge));
secp256k1_ge *ge_set_all = (secp256k1_ge *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_ge));
for (i = 0; i < 4 * runs + 1; i++) {
/* Compute gej[i + 1].z / gez[i].z (with gej[n].z taken to be 1). */
if (i < 4 * runs) {
secp256k1_fe_mul(&zr[i + 1], &zinv[i], &gej[i + 1].z);
}
}
secp256k1_ge_set_table_gej_var(ge_set_table, gej, zr, 4 * runs + 1);
secp256k1_ge_set_all_gej_var(ge_set_all, gej, 4 * runs + 1, &ctx->error_callback);
secp256k1_ge_set_all_gej_var(ge_set_all, gej, 4 * runs + 1);
for (i = 0; i < 4 * runs + 1; i++) {
secp256k1_fe s;
random_fe_non_zero(&s);
secp256k1_gej_rescale(&gej[i], &s);
ge_equals_gej(&ge_set_table[i], &gej[i]);
ge_equals_gej(&ge_set_all[i], &gej[i]);
}
free(ge_set_table);
free(ge_set_all);
free(zr);
}

/* Test batch gej -> ge conversion with many infinities. */
for (i = 0; i < 4 * runs + 1; i++) {
random_group_element_test(&ge[i]);
/* randomly set half the points to infinitiy */
if(secp256k1_fe_is_odd(&ge[i].x)) {
secp256k1_ge_set_infinity(&ge[i]);
}
secp256k1_gej_set_ge(&gej[i], &ge[i]);
}
/* batch invert */
secp256k1_ge_set_all_gej_var(ge, gej, 4 * runs + 1);
/* check result */
for (i = 0; i < 4 * runs + 1; i++) {
ge_equals_gej(&ge[i], &gej[i]);
}

free(ge);
free(gej);
free(zinv);
Expand Down