ecmult_impl: eliminate scratch memory used when generating context

bitcoin-core · Sep 22, 2018 · e1dacce · e1dacce
1 parent 912aa8c
commit e1dacce
Show file tree

Hide file tree

Showing 4 changed files with 100 additions and 21 deletions.
diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
@@ -137,24 +137,94 @@ static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge *p
     secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A), pre, globalz, prej, zr);
 }
 
-static void secp256k1_ecmult_odd_multiples_table_storage_var(int n, secp256k1_ge_storage *pre, const secp256k1_gej *a, const secp256k1_callback *cb) {
-    secp256k1_gej *prej = (secp256k1_gej*)checked_malloc(cb, sizeof(secp256k1_gej) * n);
-    secp256k1_ge *prea = (secp256k1_ge*)checked_malloc(cb, sizeof(secp256k1_ge) * n);
-    secp256k1_fe *zr = (secp256k1_fe*)checked_malloc(cb, sizeof(secp256k1_fe) * n);
+static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp256k1_ge_storage *pre, const secp256k1_gej *a) {
+    secp256k1_gej d;
+    secp256k1_ge a_ge, d_ge, p_ge;
+    secp256k1_ge last_ge;
+    secp256k1_gej pj;
+    secp256k1_fe zi;
+    secp256k1_fe zr;
+    secp256k1_fe dx_over_dz_squared;
     int i;
 
-    /* Compute the odd multiples in Jacobian form. */
-    secp256k1_ecmult_odd_multiples_table(n, prej, zr, a);
-    /* Convert them in batch to affine coordinates. */
-    secp256k1_ge_set_table_gej_var(prea, prej, zr, n);
-    /* Convert them to compact storage form. */
-    for (i = 0; i < n; i++) {
-        secp256k1_ge_to_storage(&pre[i], &prea[i]);
+    VERIFY_CHECK(!a->infinity);
+
+    secp256k1_gej_double_var(&d, a, NULL);
+
+    /* First, we perform all the additions in an isomorphic curve obtained by multiplying
+     * all `z` coordinates by 1/`d.z`. In these coordinates `d` is affine so we can use
+     * `secp256k1_gej_add_ge_var` to perform the additions. For each addition, we store
+     * the resulting y-coordinate and the z-ratio, since we only have enough memory to
+     * store two field elements and these will be sufficient to undo the isomorphism and
+     * compute all the `x`s, using only one field inversion.
+     */
+    d_ge.x = d.x;
+    d_ge.y = d.y;
+    d_ge.infinity = 0;
+
+    secp256k1_ge_set_gej_zinv(&a_ge, a, &d.z);
+    pj.x = a_ge.x;
+    pj.y = a_ge.y;
+    pj.z = a->z;
+    pj.infinity = 0;
+
+    zr = d.z;
+    secp256k1_fe_normalize(&zr);
+    secp256k1_fe_to_storage(&pre[0].x, &zr);
+    secp256k1_fe_normalize(&pj.y);
+    secp256k1_fe_to_storage(&pre[0].y, &pj.y);
+
+    for (i = 1; i < n; i++) {
+        secp256k1_gej_add_ge_var(&pj, &pj, &d_ge, &zr);
+        secp256k1_fe_normalize(&zr);
+        secp256k1_fe_to_storage(&pre[i].x, &zr);
+        secp256k1_fe_normalize(&pj.y);
+        secp256k1_fe_to_storage(&pre[i].y, &pj.y);
     }
 
-    free(prea);
-    free(prej);
-    free(zr);
+    /* Map `pj` back to our curve by multiplying its z-coordinate by `d.z`. */
+    secp256k1_fe_mul(&pj.z, &pj.z, &d.z);
+    /* Directly set `pre[n - 1]` to `pj`, saving the inverted z-coordinate so
+     * that we can combine it with the saved z-ratios to compute the other zs
+     * without any more inversions. */
+    secp256k1_fe_inv_var(&zi, &pj.z);
+    secp256k1_ge_set_gej_zinv(&p_ge, &pj, &zi);
+    secp256k1_ge_from_storage(&last_ge, &pre[n - 1]);
+    secp256k1_ge_to_storage(&pre[n - 1], &p_ge);
+
+    /* Compute the actual x-coordinate of D, which will be needed below. */
+    secp256k1_fe_inv_var(&d.z, &d.z);
+    secp256k1_fe_sqr(&dx_over_dz_squared, &d.z);
+    secp256k1_fe_mul(&dx_over_dz_squared, &dx_over_dz_squared, &d.x);
+
+    i = n - 1;
+    while (i > 0) {
+        secp256k1_fe zi2, zi3;
+        i--;
+        /* For the remaining points, we extract the z-ratio from the stored
+         * x-coordinate, compute its z^-1 from that, and compute the full
+         * point from that: */
+        secp256k1_fe_mul(&zi, &zi, &last_ge.x);
+        secp256k1_fe_sqr(&zi2, &zi);
+        secp256k1_fe_mul(&zi3, &zi2, &zi);
+        /* To compute x, we observe that the z-ratio is simply `h` from
+         * `gej_add_ge_var` which is equal to `d_x * z^2 - x`, where
+         * `d_x` is the x coordinate of `D` and `x`, `z` are the Jacobian
+         * coordinates of our desired point. Rearranging and dividing by
+         * `z^2` to convert to affine, we get
+         *
+         *     x = d_x - rzr / z^2
+         *       = d_x - rzr * zi2
+         */
+        secp256k1_fe_mul(&p_ge.x, &last_ge.x, &zi2);
+        secp256k1_fe_negate(&p_ge.x, &p_ge.x, 1);
+        secp256k1_fe_add(&p_ge.x, &dx_over_dz_squared);
+        /* y is stored_y/z^3, as we expect */
+        secp256k1_ge_from_storage(&last_ge, &pre[i]);
+        secp256k1_fe_mul(&p_ge.y, &last_ge.y, &zi3);
+        /* Store */
+        secp256k1_ge_to_storage(&pre[i], &p_ge);
+    }
 }
 
 /** The following two macro retrieves a particular odd multiple from a table
@@ -202,7 +272,7 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context *ctx, const
     ctx->pre_g = (secp256k1_ge_storage (*)[])checked_malloc(cb, sizeof((*ctx->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G));
 
     /* precompute the tables with odd multiples */
-    secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj, cb);
+    secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj);
 
 #ifdef USE_ENDOMORPHISM
     {
@@ -216,7 +286,7 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context *ctx, const
         for (i = 0; i < 128; i++) {
             secp256k1_gej_double_var(&g_128j, &g_128j, NULL);
         }
-        secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j, cb);
+        secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j);
     }
 #endif
 }

diff --git a/src/group.h b/src/group.h
@@ -70,7 +70,7 @@ static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a
 /** Set a batch of group elements equal to the inputs given in jacobian
  *  coordinates (with known z-ratios). zr must contain the known z-ratios such
  *  that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. */
-static void secp256k1_ge_set_table_gej_var(secp256k1_ge *r, const secp256k1_gej *a, const secp256k1_fe *zr, size_t len);
+static void secp256k1_ge_set_table_gej_var(secp256k1_ge_storage *r, const secp256k1_gej *a, const secp256k1_fe *zr, size_t len);
 
 /** Bring a batch inputs given in jacobian coordinates (with known z-ratios) to
  *  the same global z "denominator". zr must contain the known z-ratios such

diff --git a/src/group_impl.h b/src/group_impl.h
@@ -172,20 +172,23 @@ static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a
     }
 }
 
-static void secp256k1_ge_set_table_gej_var(secp256k1_ge *r, const secp256k1_gej *a, const secp256k1_fe *zr, size_t len) {
+static void secp256k1_ge_set_table_gej_var(secp256k1_ge_storage *r, const secp256k1_gej *a, const secp256k1_fe *zr, size_t len) {
     size_t i = len - 1;
     secp256k1_fe zi;
 
     if (len > 0) {
+        secp256k1_ge rge;
         /* Compute the inverse of the last z coordinate, and use it to compute the last affine output. */
         secp256k1_fe_inv(&zi, &a[i].z);
-        secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zi);
+        secp256k1_ge_set_gej_zinv(&rge, &a[i], &zi);
+        secp256k1_ge_to_storage(&r[i], &rge);
 
         /* Work out way backwards, using the z-ratios to scale the x/y values. */
         while (i > 0) {
             secp256k1_fe_mul(&zi, &zi, &zr[i]);
             i--;
-            secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zi);
+            secp256k1_ge_set_gej_zinv(&rge, &a[i], &zi);
+            secp256k1_ge_to_storage(&r[i], &rge);
         }
     }
 }

diff --git a/src/tests.c b/src/tests.c
@@ -2096,14 +2096,19 @@ void test_ge(void) {
     {
         secp256k1_fe *zr = (secp256k1_fe *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_fe));
         secp256k1_ge *ge_set_table = (secp256k1_ge *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_ge));
+        secp256k1_ge_storage *ge_set_table_storage = (secp256k1_ge_storage *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_ge_storage));
         secp256k1_ge *ge_set_all = (secp256k1_ge *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_ge));
         for (i = 0; i < 4 * runs + 1; i++) {
             /* Compute gej[i + 1].z / gez[i].z (with gej[n].z taken to be 1). */
             if (i < 4 * runs) {
                 secp256k1_fe_mul(&zr[i + 1], &zinv[i], &gej[i + 1].z);
             }
         }
-        secp256k1_ge_set_table_gej_var(ge_set_table, gej, zr, 4 * runs + 1);
+        secp256k1_ge_set_table_gej_var(&ge_set_table_storage[1], &gej[1], &zr[1], 4 * runs);
+        secp256k1_ge_set_infinity(&ge_set_table[0]);
+        for (i = 1; i < 4 * runs + 1; i++) {
+            secp256k1_ge_from_storage(&ge_set_table[i], &ge_set_table_storage[i]);
+        }
         secp256k1_ge_set_all_gej_var(ge_set_all, gej, 4 * runs + 1);
         for (i = 0; i < 4 * runs + 1; i++) {
             secp256k1_fe s;
@@ -2112,6 +2117,7 @@ void test_ge(void) {
             ge_equals_gej(&ge_set_table[i], &gej[i]);
             ge_equals_gej(&ge_set_all[i], &gej[i]);
         }
+        free(ge_set_table_storage);
         free(ge_set_table);
         free(ge_set_all);
         free(zr);