From: Loup Vaillant Date: Sat, 22 May 2021 13:30:02 +0000 (+0200) Subject: Define fe_invert() in terms of invsqrt() X-Git-Url: https://git.codecow.com/?a=commitdiff_plain;h=61ecab579f9581b4b4c5d3791cb6a45e2698d155;p=Monocypher.git Define fe_invert() in terms of invsqrt() Pros: - The code is a couple lines shorter. - Stack usage may be lowered a tiny little bit (we saved two temporary field elements). Cons: - Inversion is one field multiplication slower. Note that fully optimised inversion can be 6 field multiplications faster than the current version. It would cost quite a bit of code though (we'd need a dedicated addition chain). --- diff --git a/src/monocypher.c b/src/monocypher.c index 8b84787..2951003 100644 --- a/src/monocypher.c +++ b/src/monocypher.c @@ -1424,41 +1424,6 @@ static void fe_sq2(fe h, const fe f) fe_mul_small(h, h, 2); } -// This could be simplified, but it would be slower -static void fe_pow22523(fe out, const fe z) -{ - fe t0, t1, t2; - fe_sq(t0, z); - fe_sq(t1,t0); fe_sq(t1, t1); fe_mul(t1, z, t1); - fe_mul(t0, t0, t1); - fe_sq(t0, t0); fe_mul(t0, t1, t0); - fe_sq(t1, t0); FOR (i, 1, 5) fe_sq(t1, t1); fe_mul(t0, t1, t0); - fe_sq(t1, t0); FOR (i, 1, 10) fe_sq(t1, t1); fe_mul(t1, t1, t0); - fe_sq(t2, t1); FOR (i, 1, 20) fe_sq(t2, t2); fe_mul(t1, t2, t1); - fe_sq(t1, t1); FOR (i, 1, 10) fe_sq(t1, t1); fe_mul(t0, t1, t0); - fe_sq(t1, t0); FOR (i, 1, 50) fe_sq(t1, t1); fe_mul(t1, t1, t0); - fe_sq(t2, t1); FOR (i, 1, 100) fe_sq(t2, t2); fe_mul(t1, t2, t1); - fe_sq(t1, t1); FOR (i, 1, 50) fe_sq(t1, t1); fe_mul(t0, t1, t0); - fe_sq(t0, t0); FOR (i, 1, 2) fe_sq(t0, t0); fe_mul(out, t0, z); - WIPE_BUFFER(t0); - WIPE_BUFFER(t1); - WIPE_BUFFER(t2); -} - -// Inverting means multiplying by 2^255 - 21 -// 2^255 - 21 = (2^252 - 3) * 8 + 3 -// So we reuse the multiplication chain of fe_pow22523 -static void fe_invert(fe out, const fe z) -{ - fe tmp; - fe_pow22523(tmp, z); - // tmp2^8 * z^3 - fe_sq(tmp, tmp); // 0 - fe_sq(tmp, tmp); fe_mul(tmp, tmp, z); // 1 - fe_sq(tmp, tmp); fe_mul(out, tmp, z); // 1 - WIPE_BUFFER(tmp); -} - // Parity check. Returns 0 if even, 1 if odd static int fe_isodd(const fe f) { @@ -1485,7 +1450,7 @@ static int fe_isequal(const fe f, const fe g) // Inverse square root. // Returns true if x is a non zero square, false otherwise. // After the call: -// isr = sqrt(1/x) if x is non-zero square. +// isr = sqrt(1/x) if x is a non-zero square. // isr = sqrt(sqrt(-1)/x) if x is not a square. // isr = 0 if x is zero. // We do not guarantee the sign of the square root. @@ -1541,21 +1506,64 @@ static int fe_isequal(const fe f, const fe g) // x^((p-5)/8) * sqrt(-1) = -sqrt(sqrt(-1)/x) or sqrt(sqrt(-1)/x) static int invsqrt(fe isr, const fe x) { - fe check, quartic; - fe_copy(check, x); - fe_pow22523(isr, check); - fe_sq (quartic, isr); - fe_mul(quartic, quartic, check); + fe t0, t1, t2; + + // t0 = x^((p-5)/8) + // Can be achieved with a simple double & add ladder, + // but it would be slower. + fe_sq(t0, x); + fe_sq(t1,t0); fe_sq(t1, t1); fe_mul(t1, x, t1); + fe_mul(t0, t0, t1); + fe_sq(t0, t0); fe_mul(t0, t1, t0); + fe_sq(t1, t0); FOR (i, 1, 5) fe_sq(t1, t1); fe_mul(t0, t1, t0); + fe_sq(t1, t0); FOR (i, 1, 10) fe_sq(t1, t1); fe_mul(t1, t1, t0); + fe_sq(t2, t1); FOR (i, 1, 20) fe_sq(t2, t2); fe_mul(t1, t2, t1); + fe_sq(t1, t1); FOR (i, 1, 10) fe_sq(t1, t1); fe_mul(t0, t1, t0); + fe_sq(t1, t0); FOR (i, 1, 50) fe_sq(t1, t1); fe_mul(t1, t1, t0); + fe_sq(t2, t1); FOR (i, 1, 100) fe_sq(t2, t2); fe_mul(t1, t2, t1); + fe_sq(t1, t1); FOR (i, 1, 50) fe_sq(t1, t1); fe_mul(t0, t1, t0); + fe_sq(t0, t0); FOR (i, 1, 2) fe_sq(t0, t0); fe_mul(t0, t0, x); + + // quartic = x^((p-1)/4) + i32 *quartic = t1; + fe_sq (quartic, t0); + fe_mul(quartic, quartic, x); + + i32 *check = t2; fe_1 (check); int p1 = fe_isequal(quartic, check); fe_neg(check, check ); int m1 = fe_isequal(quartic, check); fe_neg(check, sqrtm1); int ms = fe_isequal(quartic, check); - fe_mul(check, isr, sqrtm1); - fe_ccopy(isr, check, m1 | ms); - WIPE_BUFFER(quartic); - WIPE_BUFFER(check); + + // if quartic == -1 or sqrt(-1) + // then isr = x^((p-1)/4) * sqrt(-1) + // else isr = x^((p-1)/4) + fe_mul(isr, t0, sqrtm1); + fe_ccopy(isr, t0, 1 - (m1 | ms)); + + WIPE_BUFFER(t0); + WIPE_BUFFER(t1); + WIPE_BUFFER(t2); return p1 | m1; } +// Inverse in terms of inverse square root. +// Requires two additional squarings to get rid of the sign. +// +// 1/x = x * (+invsqrt(x^2))^2 +// = x * (-invsqrt(x^2))^2 +// +// A fully optimised exponentiation by p-1 would save 6 field +// multiplications, but it would require more code. +static void fe_invert(fe out, const fe x) +{ + fe tmp; + fe_sq(tmp, x); + invsqrt(tmp, tmp); + fe_sq(tmp, tmp); + fe_mul(out, tmp, x); + WIPE_BUFFER(tmp); +} + // trim a scalar for scalar multiplication static void trim_scalar(u8 scalar[32]) {