-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: "safegcd" field and scalar inversion #767
Changes from 4 commits
9fbe485
4fab082
0b90a57
0c3869a
11b525c
3ae7179
2f643ad
b29e51e
3519dcc
bd18471
b8c7390
64a4912
e5f2d29
34bec40
bfd7a0f
f873c3b
17982d8
06d568a
16509ca
40c815e
dc58f4f
132c76d
2f6dfa2
90743d2
5de2c83
308fd32
ff0cf11
b51a1b5
1baff2c
65550c1
5ccfc30
cbd2d57
85da7a9
c9b7717
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -498,4 +498,380 @@ static SECP256K1_INLINE void secp256k1_fe_from_storage(secp256k1_fe *r, const se | |
#endif | ||
} | ||
|
||
static const secp256k1_fe SECP256K1_FE_TWO_POW_744 = SECP256K1_FE_CONST( | ||
0x0E90A100UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, | ||
0x00000000UL, 0x00000100UL, 0x000B7300UL, 0x1D214200UL | ||
); | ||
|
||
static void secp256k1_fe_mul_add(int64_t a0, int64_t a1, int64_t b0, int64_t b1, int64_t c0, int64_t c1, int64_t d0, int64_t d1, int64_t *t) { | ||
|
||
/* Each [a0,a1], etc. pair is a 126-bit signed value e.g. a0 + a1 * 2^64. | ||
* This method calculates ([a0,a1] * [c0,c1]) + ([b0,b1] * [d0,d1]), and | ||
* writes the 252-bit signed result to [t[0],t[1],t[2],t[3]]. | ||
*/ | ||
|
||
int64_t z0, z1, z2, z3; | ||
int128_t tt; | ||
|
||
tt = (int128_t)a0 * b0 | ||
+ (int128_t)c0 * d0; | ||
z0 = (int64_t)tt; tt -= z0; tt >>= 64; | ||
|
||
tt += (int128_t)a0 * b1 | ||
+ (int128_t)a1 * b0 | ||
+ (int128_t)c0 * d1 | ||
+ (int128_t)c1 * d0; | ||
z1 = (int64_t)tt; tt -= z1; tt >>= 64; | ||
|
||
tt += (int128_t)a1 * b1 | ||
+ (int128_t)c1 * d1; | ||
z2 = (int64_t)tt; tt -= z2; tt >>= 64; | ||
|
||
z3 = (int64_t)tt; | ||
|
||
t[0] = z0; t[1] = z1; t[2] = z2; t[3] = z3; | ||
} | ||
|
||
static void secp256k1_fe_combine_1s(int64_t *t) { | ||
|
||
int64_t a = t[0], b = t[1], c = t[2], d = t[3], | ||
e = t[4], f = t[5], g = t[6], h = t[7]; | ||
int128_t I, J, K, L; | ||
|
||
I = (int128_t)e * a + (int128_t)f * c; | ||
J = (int128_t)e * b + (int128_t)f * d; | ||
K = (int128_t)g * a + (int128_t)h * c; | ||
L = (int128_t)g * b + (int128_t)h * d; | ||
|
||
a = (int64_t)I; I -= a; I >>= 64; b = (int64_t)I; | ||
c = (int64_t)J; J -= c; J >>= 64; d = (int64_t)J; | ||
e = (int64_t)K; K -= e; K >>= 64; f = (int64_t)K; | ||
g = (int64_t)L; L -= g; L >>= 64; h = (int64_t)L; | ||
|
||
t[0] = a; t[1] = b; t[2] = c; t[3] = d; | ||
t[4] = e; t[5] = f; t[6] = g; t[7] = h; | ||
} | ||
|
||
static void secp256k1_fe_combine_2s(int64_t *t) { | ||
|
||
int64_t a0 = t[ 0], a1 = t[ 1]; | ||
int64_t b0 = t[ 2], b1 = t[ 3]; | ||
int64_t c0 = t[ 4], c1 = t[ 5]; | ||
int64_t d0 = t[ 6], d1 = t[ 7]; | ||
int64_t e0 = t[ 8], e1 = t[ 9]; | ||
int64_t f0 = t[10], f1 = t[11]; | ||
int64_t g0 = t[12], g1 = t[13]; | ||
int64_t h0 = t[14], h1 = t[15]; | ||
|
||
secp256k1_fe_mul_add(e0, e1, a0, a1, f0, f1, c0, c1, &t[0]); | ||
secp256k1_fe_mul_add(e0, e1, b0, b1, f0, f1, d0, d1, &t[4]); | ||
secp256k1_fe_mul_add(g0, g1, a0, a1, h0, h1, c0, c1, &t[8]); | ||
secp256k1_fe_mul_add(g0, g1, b0, b1, h0, h1, d0, d1, &t[12]); | ||
} | ||
|
||
static void secp256k1_fe_decode_matrix(secp256k1_fe *r, int64_t *t) { | ||
|
||
uint64_t u0, u1, u2, u3, u4; | ||
uint64_t r0, r1, r2, r3, r4; | ||
int128_t cc; | ||
|
||
cc = t[0]; | ||
u0 = (uint64_t)cc; cc >>= 64; | ||
cc += t[1]; | ||
u1 = (uint64_t)cc; cc >>= 64; | ||
cc += t[2]; | ||
u2 = (uint64_t)cc; cc >>= 64; | ||
cc += t[3]; | ||
u3 = (uint64_t)cc; cc >>= 64; | ||
u4 = (uint64_t)cc; | ||
|
||
VERIFY_CHECK(u4 == 0 || u4 == UINT64_MAX); | ||
|
||
/* Add twice the field prime in case u4 is non-zero (which represents -2^256). */ | ||
r0 = 0xFFFFEFFFFFC2FULL * 2; | ||
r1 = 0xFFFFFFFFFFFFFULL * 2; | ||
r2 = 0xFFFFFFFFFFFFFULL * 2; | ||
r3 = 0xFFFFFFFFFFFFFULL * 2; | ||
r4 = 0x0FFFFFFFFFFFFULL * 2; | ||
|
||
r0 += u0 & 0xFFFFFFFFFFFFFULL; | ||
r1 += u0 >> 52 | ((u1 << 12) & 0xFFFFFFFFFFFFFULL); | ||
r2 += u1 >> 40 | ((u2 << 24) & 0xFFFFFFFFFFFFFULL); | ||
r3 += u2 >> 28 | ((u3 << 36) & 0xFFFFFFFFFFFFFULL); | ||
r4 += u3 >> 16 | (u4 << 48); | ||
|
||
r->n[0] = r0; | ||
r->n[1] = r1; | ||
r->n[2] = r2; | ||
r->n[3] = r3; | ||
r->n[4] = r4; | ||
|
||
#ifdef VERIFY | ||
r->magnitude = 2; | ||
r->normalized = 0; | ||
secp256k1_fe_verify(r); | ||
#endif | ||
} | ||
|
||
static void secp256k1_fe_encode_62(int64_t *r, const secp256k1_fe *a) { | ||
|
||
const uint64_t M62 = UINT64_MAX >> 2; | ||
const uint64_t *n = &a->n[0]; | ||
uint64_t a0 = n[0], a1 = n[1], a2 = n[2], a3 = n[3], a4 = n[4]; | ||
|
||
#ifdef VERIFY | ||
VERIFY_CHECK(a->normalized); | ||
#endif | ||
|
||
r[0] = (a0 | a1 << 52) & M62; | ||
r[1] = (a1 >> 10 | a2 << 42) & M62; | ||
r[2] = (a2 >> 20 | a3 << 32) & M62; | ||
r[3] = (a3 >> 30 | a4 << 22) & M62; | ||
r[4] = a4 >> 40; | ||
} | ||
|
||
static int secp256k1_fe_divsteps_62(uint16_t eta, uint64_t f0, uint64_t g0, int64_t *t) { | ||
|
||
uint64_t u = -(uint64_t)1, v = 0, q = 0, r = -(uint64_t)1; | ||
uint64_t c1, c2, f = f0, g = g0, x, y, z; | ||
int i; | ||
|
||
for (i = 0; i < 62; ++i) { | ||
|
||
VERIFY_CHECK((u * f0 + v * g0) == -f << i); | ||
VERIFY_CHECK((q * f0 + r * g0) == -g << i); | ||
|
||
c1 = -(g & (eta >> 15)); | ||
|
||
x = (f ^ g) & c1; | ||
f ^= x; g ^= x; g ^= c1; g -= c1; | ||
|
||
y = (u ^ q) & c1; | ||
u ^= y; q ^= y; q ^= c1; q -= c1; | ||
|
||
z = (v ^ r) & c1; | ||
v ^= z; r ^= z; r ^= c1; r -= c1; | ||
|
||
eta = (eta ^ (uint16_t)c1) - (uint16_t)c1 - 1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On a related note, seeing integer types smaller than There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Am I just missing a U i.e. 1U or else could you explain? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The way integer operations (and this includes bitwise as well as unary operators, shifts and comparisons) is done in C can be pretty unexpected for types smaller than For this line here, this means In general, if you ignore the theoretical cases like The real fuckup here is that the The typical trick to work around this is to start the arithmetic with an unsigned int value, e.g., [1] https://wiki.sei.cmu.edu/confluence/display/c/INT02-C.+Understand+integer+conversion+rules |
||
|
||
c2 = -(g & 1); | ||
|
||
g += (f & c2); g >>= 1; | ||
q += (u & c2); u <<= 1; | ||
r += (v & c2); v <<= 1; | ||
} | ||
|
||
t[0] = (int64_t)u; | ||
t[1] = (int64_t)v; | ||
t[2] = (int64_t)q; | ||
t[3] = (int64_t)r; | ||
|
||
return eta; | ||
} | ||
|
||
static void secp256k1_fe_update_fg(int len, int64_t *f, int64_t *g, int64_t *t) { | ||
|
||
const int64_t M62 = (int64_t)(UINT64_MAX >> 2); | ||
int64_t u = t[0], v = t[1], q = t[2], r = t[3], fi, gi; | ||
int128_t cf = 0, cg = 0; | ||
int i; | ||
|
||
VERIFY_CHECK(len > 0); | ||
|
||
fi = f[0]; | ||
gi = g[0]; | ||
|
||
cf -= (int128_t)u * fi + (int128_t)v * gi; | ||
cg -= (int128_t)q * fi + (int128_t)r * gi; | ||
|
||
VERIFY_CHECK(((int64_t)cf & M62) == 0); | ||
VERIFY_CHECK(((int64_t)cg & M62) == 0); | ||
|
||
cf >>= 62; | ||
cg >>= 62; | ||
|
||
for (i = 1; i < len; ++i) { | ||
|
||
fi = f[i]; | ||
gi = g[i]; | ||
|
||
cf -= (int128_t)u * fi + (int128_t)v * gi; | ||
cg -= (int128_t)q * fi + (int128_t)r * gi; | ||
|
||
f[i - 1] = (int64_t)cf & M62; cf >>= 62; | ||
g[i - 1] = (int64_t)cg & M62; cg >>= 62; | ||
} | ||
|
||
f[i - 1] = (int64_t)cf; | ||
g[i - 1] = (int64_t)cg; | ||
} | ||
|
||
static void secp256k1_fe_inv(secp256k1_fe *r, const secp256k1_fe *a) { | ||
|
||
#if 1 | ||
|
||
/* TODO Check for a == 0? */ | ||
|
||
int64_t t[12 * 4]; | ||
int64_t f[5] = { 0x3FFFFFFEFFFFFC2FLL, 0x3FFFFFFFFFFFFFFFLL, 0x3FFFFFFFFFFFFFFFLL, | ||
0x3FFFFFFFFFFFFFFFLL, 0xFFLL }; | ||
int64_t g[5]; | ||
secp256k1_fe b0, d0, a1, b1, c1, d1; | ||
int i, len, sign; | ||
int16_t eta; | ||
|
||
/* TODO 2^256 (mod p) is small, so it could be faster to multiply the input | ||
* by 2^768, and then the output by 2^24. */ | ||
/* Instead of dividing the output by 2^744, we scale the input. */ | ||
secp256k1_fe_mul(&b0, a, &SECP256K1_FE_TWO_POW_744); | ||
secp256k1_fe_normalize(&b0); | ||
secp256k1_fe_encode_62(&g[0], &b0); | ||
|
||
eta = -1; | ||
|
||
for (i = 0; i < 12; ++i) { | ||
eta = secp256k1_fe_divsteps_62(eta, f[0], g[0], &t[i * 4]); | ||
len = i <= 6 ? 5 : i >= 10 ? 1 : 11 - i; | ||
secp256k1_fe_update_fg(len, f, g, &t[i * 4]); | ||
} | ||
|
||
/* At this point, f must equal +/- 1 (the GCD). */ | ||
sign = (f[0] >> 1) & 1; | ||
|
||
for (i = 0; i < 3; ++i) { | ||
int tOff = i * 16; | ||
secp256k1_fe_combine_1s(&t[tOff + 0]); | ||
secp256k1_fe_combine_1s(&t[tOff + 8]); | ||
secp256k1_fe_combine_2s(&t[tOff + 0]); | ||
} | ||
|
||
/* secp256k1_fe_decode_matrix(&a0, &t[0]); */ | ||
secp256k1_fe_decode_matrix(&b0, &t[4]); | ||
/* secp256k1_fe_decode_matrix(&c0, &t[8]); */ | ||
secp256k1_fe_decode_matrix(&d0, &t[12]); | ||
|
||
secp256k1_fe_decode_matrix(&a1, &t[16]); | ||
secp256k1_fe_decode_matrix(&b1, &t[20]); | ||
secp256k1_fe_decode_matrix(&c1, &t[24]); | ||
secp256k1_fe_decode_matrix(&d1, &t[28]); | ||
|
||
secp256k1_fe_mul(&a1, &a1, &b0); | ||
secp256k1_fe_mul(&b1, &b1, &d0); | ||
secp256k1_fe_mul(&c1, &c1, &b0); | ||
secp256k1_fe_mul(&d1, &d1, &d0); | ||
|
||
b0 = a1; secp256k1_fe_add(&b0, &b1); | ||
d0 = c1; secp256k1_fe_add(&d0, &d1); | ||
|
||
secp256k1_fe_decode_matrix(&a1, &t[32]); | ||
secp256k1_fe_decode_matrix(&b1, &t[36]); | ||
/* secp256k1_fe_decode_matrix(&c1, &t[40]); */ | ||
/* secp256k1_fe_decode_matrix(&d1, &t[44]); */ | ||
|
||
secp256k1_fe_mul(&a1, &a1, &b0); | ||
secp256k1_fe_mul(&b1, &b1, &d0); | ||
/* secp256k1_fe_mul(&c1, &c1, &b0); */ | ||
/* secp256k1_fe_mul(&d1, &d1, &d0); */ | ||
|
||
b0 = a1; secp256k1_fe_add(&b0, &b1); | ||
/* d0 = c1; secp256k1_fe_add(&d0, &d1); */ | ||
|
||
secp256k1_fe_negate(&b1, &b0, 2); | ||
secp256k1_fe_cmov(&b0, &b1, sign); | ||
secp256k1_fe_normalize_weak(&b0); | ||
|
||
*r = b0; | ||
|
||
#else | ||
|
||
secp256k1_fe x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1; | ||
int j; | ||
|
||
/** The binary representation of (p - 2) has 5 blocks of 1s, with lengths in | ||
* { 1, 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block: | ||
* [1], [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223] | ||
*/ | ||
|
||
secp256k1_fe_sqr(&x2, a); | ||
secp256k1_fe_mul(&x2, &x2, a); | ||
|
||
secp256k1_fe_sqr(&x3, &x2); | ||
secp256k1_fe_mul(&x3, &x3, a); | ||
|
||
x6 = x3; | ||
for (j=0; j<3; j++) { | ||
secp256k1_fe_sqr(&x6, &x6); | ||
} | ||
secp256k1_fe_mul(&x6, &x6, &x3); | ||
|
||
x9 = x6; | ||
for (j=0; j<3; j++) { | ||
secp256k1_fe_sqr(&x9, &x9); | ||
} | ||
secp256k1_fe_mul(&x9, &x9, &x3); | ||
|
||
x11 = x9; | ||
for (j=0; j<2; j++) { | ||
secp256k1_fe_sqr(&x11, &x11); | ||
} | ||
secp256k1_fe_mul(&x11, &x11, &x2); | ||
|
||
x22 = x11; | ||
for (j=0; j<11; j++) { | ||
secp256k1_fe_sqr(&x22, &x22); | ||
} | ||
secp256k1_fe_mul(&x22, &x22, &x11); | ||
|
||
x44 = x22; | ||
for (j=0; j<22; j++) { | ||
secp256k1_fe_sqr(&x44, &x44); | ||
} | ||
secp256k1_fe_mul(&x44, &x44, &x22); | ||
|
||
x88 = x44; | ||
for (j=0; j<44; j++) { | ||
secp256k1_fe_sqr(&x88, &x88); | ||
} | ||
secp256k1_fe_mul(&x88, &x88, &x44); | ||
|
||
x176 = x88; | ||
for (j=0; j<88; j++) { | ||
secp256k1_fe_sqr(&x176, &x176); | ||
} | ||
secp256k1_fe_mul(&x176, &x176, &x88); | ||
|
||
x220 = x176; | ||
for (j=0; j<44; j++) { | ||
secp256k1_fe_sqr(&x220, &x220); | ||
} | ||
secp256k1_fe_mul(&x220, &x220, &x44); | ||
|
||
x223 = x220; | ||
for (j=0; j<3; j++) { | ||
secp256k1_fe_sqr(&x223, &x223); | ||
} | ||
secp256k1_fe_mul(&x223, &x223, &x3); | ||
|
||
/* The final result is then assembled using a sliding window over the blocks. */ | ||
|
||
t1 = x223; | ||
for (j=0; j<23; j++) { | ||
secp256k1_fe_sqr(&t1, &t1); | ||
} | ||
secp256k1_fe_mul(&t1, &t1, &x22); | ||
for (j=0; j<5; j++) { | ||
secp256k1_fe_sqr(&t1, &t1); | ||
} | ||
secp256k1_fe_mul(&t1, &t1, a); | ||
for (j=0; j<3; j++) { | ||
secp256k1_fe_sqr(&t1, &t1); | ||
} | ||
secp256k1_fe_mul(&t1, &t1, &x2); | ||
for (j=0; j<2; j++) { | ||
secp256k1_fe_sqr(&t1, &t1); | ||
} | ||
secp256k1_fe_mul(r, a, &t1); | ||
#endif | ||
} | ||
|
||
#endif /* SECP256K1_FIELD_REPR_IMPL_H */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just one thing I noticed when skimming through it: The types of
eta
are pretty inconsistent. You useuint16_t
inside the function, return anint
and then store it in anint16_t
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, it should be cleaned up. It's logically a signed value (~ in [-744,744]) in the sense that the algorithm as-written switches on the sign of it, but the actual constant-time code for _divsteps is in "bit-twiddling" style, and I wanted to make it clear that there's no reliance (in this method) on arithmetic right-shift. Conceptually maybe it's (u)int_fast16_t, but I was a bit lazy about dealing with the variable sizes before things worked. Possibly uint64_t (resp. uint32_t for 32bit) makes more sense.