From cbe6f426720a84b654df485721cf143c5ed9f3b3 Mon Sep 17 00:00:00 2001 From: Loup Vaillant Date: Sun, 9 Apr 2017 17:22:12 +0200 Subject: [PATCH] made carry propagation more platform independent --- monocypher.c | 71 ++++++++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/monocypher.c b/monocypher.c index 92e593c..bcd37cf 100644 --- a/monocypher.c +++ b/monocypher.c @@ -191,10 +191,9 @@ void crypto_chacha20_encrypt(crypto_chacha_ctx *ctx, } void crypto_chacha20_stream(crypto_chacha_ctx *ctx, - u8 *cipher_text, - size_t message_size) + uint8_t *stream, size_t size) { - crypto_chacha20_encrypt(ctx, cipher_text, 0, message_size); + crypto_chacha20_encrypt(ctx, stream, 0, size); } @@ -797,16 +796,16 @@ static u32 load24_le(const u8 s[3]) sv fe_carry(fe h, i64 t[10]) { i64 c0, c1, c2, c3, c4, c5, c6, c7, c8, c9; - c9 = (t[9] + (i64) (1<<24)) >> 25; t[0] += c9 * 19; t[9] -= (u64)c9 << 25; - c1 = (t[1] + (i64) (1<<24)) >> 25; t[2] += c1; t[1] -= (u64)c1 << 25; - c3 = (t[3] + (i64) (1<<24)) >> 25; t[4] += c3; t[3] -= (u64)c3 << 25; - c5 = (t[5] + (i64) (1<<24)) >> 25; t[6] += c5; t[5] -= (u64)c5 << 25; - c7 = (t[7] + (i64) (1<<24)) >> 25; t[8] += c7; t[7] -= (u64)c7 << 25; - c0 = (t[0] + (i64) (1<<25)) >> 26; t[1] += c0; t[0] -= (u64)c0 << 26; - c2 = (t[2] + (i64) (1<<25)) >> 26; t[3] += c2; t[2] -= (u64)c2 << 26; - c4 = (t[4] + (i64) (1<<25)) >> 26; t[5] += c4; t[4] -= (u64)c4 << 26; - c6 = (t[6] + (i64) (1<<25)) >> 26; t[7] += c6; t[6] -= (u64)c6 << 26; - c8 = (t[8] + (i64) (1<<25)) >> 26; t[9] += c8; t[8] -= (u64)c8 << 26; + c9 = (t[9] + (i64) (1<<24)) >> 25; t[0] += c9 * 19; t[9] -= c9 * (1 << 25); + c1 = (t[1] + (i64) (1<<24)) >> 25; t[2] += c1; t[1] -= c1 * (1 << 25); + c3 = (t[3] + (i64) (1<<24)) >> 25; t[4] += c3; t[3] -= c3 * (1 << 25); + c5 = (t[5] + (i64) (1<<24)) >> 25; t[6] += c5; t[5] -= c5 * (1 << 25); + c7 = (t[7] + (i64) (1<<24)) >> 25; t[8] += c7; t[7] -= c7 * (1 << 25); + c0 = (t[0] + (i64) (1<<25)) >> 26; t[1] += c0; t[0] -= c0 * (1 << 26); + c2 = (t[2] + (i64) (1<<25)) >> 26; t[3] += c2; t[2] -= c2 * (1 << 26); + c4 = (t[4] + (i64) (1<<25)) >> 26; t[5] += c4; t[4] -= c4 * (1 << 26); + c6 = (t[6] + (i64) (1<<25)) >> 26; t[7] += c6; t[6] -= c6 * (1 << 26); + c8 = (t[8] + (i64) (1<<25)) >> 26; t[9] += c8; t[8] -= c8 * (1 << 26); FOR (i, 0, 10) { h[i] = t[i]; } } @@ -868,18 +867,18 @@ sv fe_mul(fe h, const fe f, const fe g) + f5*(i64)g4 + f6*(i64)g3 + f7*(i64)g2 + f8*(i64)g1 + f9*(i64)g0; i64 c0, c1, c2, c3, c4, c5, c6, c7, c8, c9; - c0 = (h0 + (i64) (1<<25)) >> 26; h1 += c0; h0 -= (u64)c0 << 26; - c4 = (h4 + (i64) (1<<25)) >> 26; h5 += c4; h4 -= (u64)c4 << 26; - c1 = (h1 + (i64) (1<<24)) >> 25; h2 += c1; h1 -= (u64)c1 << 25; - c5 = (h5 + (i64) (1<<24)) >> 25; h6 += c5; h5 -= (u64)c5 << 25; - c2 = (h2 + (i64) (1<<25)) >> 26; h3 += c2; h2 -= (u64)c2 << 26; - c6 = (h6 + (i64) (1<<25)) >> 26; h7 += c6; h6 -= (u64)c6 << 26; - c3 = (h3 + (i64) (1<<24)) >> 25; h4 += c3; h3 -= (u64)c3 << 25; - c7 = (h7 + (i64) (1<<24)) >> 25; h8 += c7; h7 -= (u64)c7 << 25; - c4 = (h4 + (i64) (1<<25)) >> 26; h5 += c4; h4 -= (u64)c4 << 26; - c8 = (h8 + (i64) (1<<25)) >> 26; h9 += c8; h8 -= (u64)c8 << 26; - c9 = (h9 + (i64) (1<<24)) >> 25; h0 += c9 * 19; h9 -= (u64)c9 << 25; - c0 = (h0 + (i64) (1<<25)) >> 26; h1 += c0; h0 -= (u64)c0 << 26; + c0 = (h0 + (i64) (1<<25)) >> 26; h1 += c0; h0 -= c0 * (1 << 26); + c4 = (h4 + (i64) (1<<25)) >> 26; h5 += c4; h4 -= c4 * (1 << 26); + c1 = (h1 + (i64) (1<<24)) >> 25; h2 += c1; h1 -= c1 * (1 << 25); + c5 = (h5 + (i64) (1<<24)) >> 25; h6 += c5; h5 -= c5 * (1 << 25); + c2 = (h2 + (i64) (1<<25)) >> 26; h3 += c2; h2 -= c2 * (1 << 26); + c6 = (h6 + (i64) (1<<25)) >> 26; h7 += c6; h6 -= c6 * (1 << 26); + c3 = (h3 + (i64) (1<<24)) >> 25; h4 += c3; h3 -= c3 * (1 << 25); + c7 = (h7 + (i64) (1<<24)) >> 25; h8 += c7; h7 -= c7 * (1 << 25); + c4 = (h4 + (i64) (1<<25)) >> 26; h5 += c4; h4 -= c4 * (1 << 26); + c8 = (h8 + (i64) (1<<25)) >> 26; h9 += c8; h8 -= c8 * (1 << 26); + c9 = (h9 + (i64) (1<<24)) >> 25; h0 += c9 * 19; h9 -= c9 * (1 << 25); + c0 = (h0 + (i64) (1<<25)) >> 26; h1 += c0; h0 -= c0 * (1 << 26); h[0] = h0; h[1] = h1; h[2] = h2; h[3] = h3; h[4] = h4; h[5] = h5; h[6] = h6; h[7] = h7; h[8] = h8; h[9] = h9; @@ -915,16 +914,16 @@ sv fe_tobytes(u8 s[32], const fe h) } t[0] += 19 * q; - i32 c0 = t[0] >> 26; t[1] += c0; t[0] -= (u64)c0 << 26; - i32 c1 = t[1] >> 25; t[2] += c1; t[1] -= (u64)c1 << 25; - i32 c2 = t[2] >> 26; t[3] += c2; t[2] -= (u64)c2 << 26; - i32 c3 = t[3] >> 25; t[4] += c3; t[3] -= (u64)c3 << 25; - i32 c4 = t[4] >> 26; t[5] += c4; t[4] -= (u64)c4 << 26; - i32 c5 = t[5] >> 25; t[6] += c5; t[5] -= (u64)c5 << 25; - i32 c6 = t[6] >> 26; t[7] += c6; t[6] -= (u64)c6 << 26; - i32 c7 = t[7] >> 25; t[8] += c7; t[7] -= (u64)c7 << 25; - i32 c8 = t[8] >> 26; t[9] += c8; t[8] -= (u64)c8 << 26; - i32 c9 = t[9] >> 25; t[9] -= (u64)c9 << 25; + i32 c0 = t[0] >> 26; t[1] += c0; t[0] -= c0 * (1 << 26); + i32 c1 = t[1] >> 25; t[2] += c1; t[1] -= c1 * (1 << 25); + i32 c2 = t[2] >> 26; t[3] += c2; t[2] -= c2 * (1 << 26); + i32 c3 = t[3] >> 25; t[4] += c3; t[3] -= c3 * (1 << 25); + i32 c4 = t[4] >> 26; t[5] += c4; t[4] -= c4 * (1 << 26); + i32 c5 = t[5] >> 25; t[6] += c5; t[5] -= c5 * (1 << 25); + i32 c6 = t[6] >> 26; t[7] += c6; t[6] -= c6 * (1 << 26); + i32 c7 = t[7] >> 25; t[8] += c7; t[7] -= c7 * (1 << 25); + i32 c8 = t[8] >> 26; t[9] += c8; t[8] -= c8 * (1 << 26); + i32 c9 = t[9] >> 25; t[9] -= c9 * (1 << 25); store32_le(s + 0, ((u32)t[0] >> 0) | ((u32)t[1] << 26)); store32_le(s + 4, ((u32)t[1] >> 6) | ((u32)t[2] << 19)); @@ -1176,7 +1175,7 @@ sv modL(u8 *r, i64 x[64]) FOR (j, i-32, i-12) { x[j] += carry - 16 * x[i] * L[j - (i - 32)]; carry = (x[j] + 128) >> 8; - x[j] -= (u64)carry << 8; + x[j] -= carry * (1 << 8); } x[i-12] += carry; x[i] = 0; -- 2.47.3