From: Loup Vaillant Date: Wed, 18 Jul 2018 19:08:40 +0000 (+0200) Subject: Unsigned sliding windows for EdDSA verification X-Git-Url: https://git.codecow.com/?a=commitdiff_plain;h=2b8f447792c3e94f9be263c206a2ea0fbadf99da;p=Monocypher.git Unsigned sliding windows for EdDSA verification Reduces the number of additions in ge_double_scalarmult_vartime(). Verification is now 80% as fast as signing (in naive implementations, it's only 50% as fast). It could be even faster, but it's probably not worth the trouble: - We could precompute the lookup table for the base point instead of constructing a cache. This would save about 8 point additions total, at the cost of 64 lines of code just to lay out the 320 precomputed constants. - We could use special, cheaper additions for the precomputed base point, at the cost of an additional addition function. - We could use *signed* sliding windows to further reduce the number of additions, at the cost of an additional point subtraction function (two if combined with special additions for the base point). Besides, I don't understand how they work. The low hanging fruits have been taken. Signature verification is faster than ever before. This is good enough. --- diff --git a/src/monocypher.c b/src/monocypher.c index a1f06f2..877bc3b 100644 --- a/src/monocypher.c +++ b/src/monocypher.c @@ -24,6 +24,7 @@ #define WIPE_BUFFER(buffer) crypto_wipe(buffer, sizeof(buffer)) #define MIN(a, b) ((a) <= (b) ? (a) : (b)) #define ALIGN(x, block_size) ((~(x) + 1) & ((block_size) - 1)) +typedef int8_t i8; typedef uint8_t u8; typedef uint32_t u32; typedef int32_t i32; @@ -1473,6 +1474,44 @@ static void ge_double(ge *s, const ge *p) // Never used to process secrets. No need to wipe } +// Compute lookup indices for unsigned sliding windows +static void slide(i8 adds[256], const u8 scalar[32]) +{ + FOR (i, 0, 256) { + adds[i] = -1; + } + int i = 0; + while (i < 253) { + if (scalar_bit(scalar, i) != 0) { + adds[i] = scalar_bit(scalar, i+1) + | scalar_bit(scalar, i+2) << 1 + | scalar_bit(scalar, i+3) << 2; + i += 3; + } + i++; + } + // Skip last zeroes + while (i < 256 && scalar_bit(scalar, i) == 0) { + i++; + } + // last lookup (if any) + if (i < 256) { + adds[i] = scalar[31] >> (i - 247);; + } +} + +// Look up table for sliding windows +static void ge_precompute(ge_cached lut[8], const ge *P1) +{ + ge P2, tmp; + ge_double(&P2, P1); + ge_cache(&lut[0], P1); + FOR (i, 0, 7) { + ge_add(&tmp, &P2, &lut[i]); + ge_cache(&lut[i+1], &tmp); + } +} + // Variable time! P, sP, and sB must not be secret! static void ge_double_scalarmult_vartime(ge *sum, const ge *P, u8 p[32], u8 b[32]) @@ -1488,9 +1527,10 @@ static void ge_double_scalarmult_vartime(ge *sum, const ge *P, ge_from_xy(&B, X, Y); // cached points for addition - ge_cached cB, cP; - ge_cache(&cB, &B); - ge_cache(&cP, P); + ge_cached cP[8]; ge_precompute(cP, P); + ge_cached cB[8]; ge_precompute(cB, &B); + i8 p_adds[256]; slide(p_adds, p); + i8 b_adds[256]; slide(b_adds, b); // sum starts at zero fe_0(sum->X); @@ -1501,8 +1541,8 @@ static void ge_double_scalarmult_vartime(ge *sum, const ge *P, // Merged double and add ladder for (int i = 255; i >= 0; i--) { ge_double(sum, sum); - if (scalar_bit(p, i)) { ge_add(sum, sum, &cP); } - if (scalar_bit(b, i)) { ge_add(sum, sum, &cB); } + if (p_adds[i] != -1) { ge_add(sum, sum, &cP[p_adds[i]]); } + if (b_adds[i] != -1) { ge_add(sum, sum, &cB[b_adds[i]]); } } }