From: Loup Vaillant Date: Fri, 3 Aug 2018 17:28:31 +0000 (+0200) Subject: Specialised adding code for EdDSA signatures X-Git-Url: https://git.codecow.com/?a=commitdiff_plain;h=022e8c69681f5e63509baf308574f5200a8a4ba7;p=Monocypher.git Specialised adding code for EdDSA signatures - Saved one multiplication by assuming Z=1 - Hoisted wipes out of loops - Removed wipes for variable time additions This made both signatures and verification a bit faster. (Note: current signature verification speed is only 23% slower than key exchange. I didn't think it could be that fast.) --- diff --git a/src/monocypher.c b/src/monocypher.c index 0f552ce..6f7696c 100644 --- a/src/monocypher.c +++ b/src/monocypher.c @@ -1460,7 +1460,7 @@ static void ge_cache(ge_cached *c, const ge *p) static void ge_add(ge *s, const ge *p, const ge_cached *q) { - fe a, b; + fe a, b; // not used to process secrets, no need to wipe fe_add(a , p->Y, p->X ); fe_sub(b , p->Y, p->X ); fe_mul(a , a , q->Yp); @@ -1468,39 +1468,54 @@ static void ge_add(ge *s, const ge *p, const ge_cached *q) fe_add(s->Y, a , b ); fe_sub(s->X, a , b ); - fe_add(s->Z, p->Z, p->Z); + fe_add(s->Z, p->Z, p->Z ); fe_mul(s->Z, s->Z, q->Z ); fe_mul(s->T, p->T, q->T2); - fe_add(a , s->Z, s->T); - fe_sub(b , s->Z, s->T); + fe_add(a , s->Z, s->T ); + fe_sub(b , s->Z, s->T ); fe_mul(s->T, s->X, s->Y); fe_mul(s->X, s->X, b ); fe_mul(s->Y, s->Y, a ); fe_mul(s->Z, a , b ); +} + +static void ge_madd(ge *s, const ge *p, const ge_cached *q, fe a, fe b) +{ + fe_add(a , p->Y, p->X ); + fe_sub(b , p->Y, p->X ); + fe_mul(a , a , q->Yp); + fe_mul(b , b , q->Ym); + fe_add(s->Y, a , b ); + fe_sub(s->X, a , b ); + + fe_add(s->Z, p->Z, p->Z ); + fe_mul(s->T, p->T, q->T2); + fe_add(a , s->Z, s->T ); + fe_sub(b , s->Z, s->T ); - WIPE_BUFFER( a); - WIPE_BUFFER( b); + fe_mul(s->T, s->X, s->Y); + fe_mul(s->X, s->X, b ); + fe_mul(s->Y, s->Y, a ); + fe_mul(s->Z, a , b ); } -static void ge_double(ge *s, const ge *p) +static void ge_double(ge *s, const ge *p, ge *q) { - ge q; // intermediate point x=X/Z, y=Y/T - fe_sq (q.X , p->X); - fe_sq (q.Y , p->Y); - fe_sq2(q.Z , p->Z); - fe_add(q.T , p->X, p->Y); - fe_sq (s->T, q.T); - fe_add(q.T , q.Y , q.X); - fe_sub(q.Y , q.Y , q.X); - fe_sub(q.X , s->T, q.T); - fe_sub(q.Z , q.Z , q.Y); + fe_sq (q->X, p->X); + fe_sq (q->Y, p->Y); + fe_sq2(q->Z, p->Z); + fe_add(q->T, p->X, p->Y); + fe_sq (s->T, q->T); + fe_add(q->T, q->Y, q->X); + fe_sub(q->Y, q->Y, q->X); + fe_sub(q->X, s->T, q->T); + fe_sub(q->Z, q->Z, q->Y); - fe_mul(s->X, q.X , q.Z); - fe_mul(s->Y, q.T , q.Y); - fe_mul(s->Z, q.Y , q.Z); - fe_mul(s->T, q.X , q.T); - WIPE_CTX(&q); + fe_mul(s->X, q->X , q->Z); + fe_mul(s->Y, q->T , q->Y); + fe_mul(s->Z, q->Y , q->Z); + fe_mul(s->T, q->X , q->T); } // Compute lookup indices for unsigned sliding windows @@ -1533,7 +1548,7 @@ static void slide(i8 adds[256], const u8 scalar[32]) static void ge_precompute(ge_cached lut[8], const ge *P1) { ge P2, tmp; - ge_double(&P2, P1); + ge_double(&P2, P1, &tmp); ge_cache(&lut[0], P1); FOR (i, 0, 7) { ge_add(&tmp, &P2, &lut[i]); @@ -1639,7 +1654,7 @@ static void ge_double_scalarmult_vartime(ge *sum, const ge *P, // Merged double and add ladder ge_zero(sum); for (int i = 255; i >= 0; i--) { - ge_double(sum, sum); + ge_double(sum, sum, &B); // B is no longer used, we can overwrite it if (p_adds[i] != -1) { ge_add(sum, sum, &cP[p_adds[i]]); } if (b_adds[i] != -1) { ge_add(sum, sum, &cB[b_adds[i]]); } } @@ -1657,9 +1672,11 @@ static void ge_scalarmult_base(ge *p, const u8 scalar[32]) // Double and add ladder ge_cached tmp; + fe a, b; // temporaries for addition + ge dbl; // temporary for doublings ge_zero(p); for (int i = 63; i >= 0; i--) { - ge_double(p, p); + ge_double(p, p, &dbl); fe_1(tmp.Ym); fe_1(tmp.Yp); fe_1(tmp.Z ); fe_0(tmp.T2); u8 nibble = scalar_bit(scalar, i) @@ -1672,9 +1689,12 @@ static void ge_scalarmult_base(ge *p, const u8 scalar[32]) fe_ccopy(tmp.Yp, ccomb[i].Yp, select); fe_ccopy(tmp.T2, ccomb[i].T2, select); } - ge_add(p, p, &tmp); + ge_madd(p, p, &tmp, a, b); } WIPE_CTX(&tmp); + WIPE_CTX(&dbl); + WIPE_BUFFER(a); + WIPE_BUFFER(b); } static void modL(u8 *r, i64 x[64])