From: Loup Vaillant Date: Sun, 4 Sep 2016 00:13:04 +0000 (+0200) Subject: removed duplicated code X-Git-Url: https://git.codecow.com/?a=commitdiff_plain;h=abbf8ebadcdc469b91295dcc359cc1cd5c18a219;p=Monocypher.git removed duplicated code --- diff --git a/chacha20.c b/chacha20.c index 41b14f2..97b9c9d 100644 --- a/chacha20.c +++ b/chacha20.c @@ -55,28 +55,14 @@ increment_counter(crypto_chacha_ctx *ctx) /// Round functions /// /////////////////////// -// A quarter round is meant to mangle a fourth of a chacha context. -// (a line, a column, or any subset you can think of). Enough of -// these rounds, carefully chosen, will garble the input beyond -// recognition. + +// Mangles the chacha context into a random-looking output. // // WARNING: THIS OPERATION IS REVERSIBLE. -// -// If you build a crypto primitive on top of it without additional -// precautions, any rookie cryptanalist can break it. -#define QUARTERROUND(a, b, c, d) \ - a = a + b; d = rotl32(d ^ a, 16); \ - c = c + d; b = rotl32(b ^ c, 12); \ - a = a + b; d = rotl32(d ^ a, 8); \ - c = c + d; b = rotl32(b ^ c, 7) - -// Mangles the chacha context into a pseudorandom output -// That is, if you don't know the key stored in the context, -// you can't deduce squat about the output. This is true -// even if you know the output of many other contexes, even -// if they differ from this one by only one bit. +// We still need to take steps to ensure the attacker can't +// deduce the input (and with it, the key) from the output. static void -chacha20_rounds(uint8_t output[64], const crypto_chacha_ctx *ctx) +chacha20_rounds(uint32_t output[16], const crypto_chacha_ctx *ctx) { // Local variables instead of indices, to facilitate optimisations // TODO: test this shit. The speed increase might be small. @@ -104,6 +90,21 @@ chacha20_rounds(uint8_t output[64], const crypto_chacha_ctx *ctx) // 8 9 10 11 // 12 13 14 15 + // A quarter round is meant to mangle a fourth of a chacha context. + // (a line, a column, or any subset you can think of). Enough of + // these rounds, carefully chosen, will garble the input beyond + // recognition. + // + // WARNING: THIS OPERATION IS REVERSIBLE. + // + // If you build a crypto primitive on top of it without additional + // precautions, any rookie cryptanalist can break it. +#define QUARTERROUND(a, b, c, d) \ + a = a + b; d = rotl32(d ^ a, 16); \ + c = c + d; b = rotl32(b ^ c, 12); \ + a = a + b; d = rotl32(d ^ a, 8); \ + c = c + d; b = rotl32(b ^ c, 7) + // Column round. Very SIMD friendly, if you want high performance. QUARTERROUND(x0, x4, x8, x12); // column 0 QUARTERROUND(x1, x5, x9, x13); // column 1 @@ -118,6 +119,30 @@ chacha20_rounds(uint8_t output[64], const crypto_chacha_ctx *ctx) QUARTERROUND(x3, x4, x9, x14); // diagonal 4 } + output[ 0] = x0; + output[ 1] = x1; + output[ 2] = x2; + output[ 3] = x3; + output[ 4] = x4; + output[ 5] = x5; + output[ 6] = x6; + output[ 7] = x7; + output[ 8] = x8; + output[ 9] = x9; + output[10] = x10; + output[11] = x11; + output[12] = x12; + output[13] = x13; + output[14] = x14; + output[15] = x15; +} + +static void +chacha20_block(uint8_t output[64], const crypto_chacha_ctx *ctx) +{ + uint32_t buffer[16]; + chacha20_rounds(buffer, ctx); + // Now our buffer is seriously garbled. However, it is still easy // to deduce the initial context from it: just invert the quarter // rounds and apply that in reverse order. @@ -143,77 +168,20 @@ chacha20_rounds(uint8_t output[64], const crypto_chacha_ctx *ctx) // the performance of naive implementations such as this one. With // SIMD, it's faster to just add the lot, so that's what the standard // does. - x0 += ctx->input[ 0]; - x1 += ctx->input[ 1]; - x2 += ctx->input[ 2]; - x3 += ctx->input[ 3]; - x4 += ctx->input[ 4]; - x5 += ctx->input[ 5]; - x6 += ctx->input[ 6]; - x7 += ctx->input[ 7]; - x8 += ctx->input[ 8]; - x9 += ctx->input[ 9]; - x10 += ctx->input[10]; - x11 += ctx->input[11]; - x12 += ctx->input[12]; - x13 += ctx->input[13]; - x14 += ctx->input[14]; - x15 += ctx->input[15]; - - // finally, we can output our buffer - store32_le(output + 0, x0 ); - store32_le(output + 4, x1 ); - store32_le(output + 8, x2 ); - store32_le(output + 12, x3 ); - store32_le(output + 16, x4 ); - store32_le(output + 20, x5 ); - store32_le(output + 24, x6 ); - store32_le(output + 28, x7 ); - store32_le(output + 32, x8 ); - store32_le(output + 36, x9 ); - store32_le(output + 40, x10); - store32_le(output + 44, x11); - store32_le(output + 48, x12); - store32_le(output + 52, x13); - store32_le(output + 56, x14); - store32_le(output + 60, x15); + for (unsigned i = 0; i < 16; i++) { + uint32_t sum = buffer[i] + ctx->input[i]; + store32_le(output + i*4, sum); + } } -// This one is the same as chacha20_rounds, only it gives you only +// This one is the same as chacha20_block, only it gives you only // half the output (256 bytes). It's basically the same as HSalsa20, // except build on ChaCha. It is provably as secure as ChaCha20 static void -half_chacha20_rounds(uint32_t output[8], const crypto_chacha_ctx *ctx) +half_chacha20_block(uint32_t output[8], const crypto_chacha_ctx *ctx) { - // Copy pasta rom chacha20_rounds - uint32_t x0 = ctx->input[ 0]; - uint32_t x1 = ctx->input[ 1]; - uint32_t x2 = ctx->input[ 2]; - uint32_t x3 = ctx->input[ 3]; - uint32_t x4 = ctx->input[ 4]; - uint32_t x5 = ctx->input[ 5]; - uint32_t x6 = ctx->input[ 6]; - uint32_t x7 = ctx->input[ 7]; - uint32_t x8 = ctx->input[ 8]; - uint32_t x9 = ctx->input[ 9]; - uint32_t x10 = ctx->input[10]; - uint32_t x11 = ctx->input[11]; - uint32_t x12 = ctx->input[12]; - uint32_t x13 = ctx->input[13]; - uint32_t x14 = ctx->input[14]; - uint32_t x15 = ctx->input[15]; - - // Copy pasta rom chacha20_rounds - for (int i = 20; i > 0; i -= 2) { - QUARTERROUND(x0, x4, x8, x12); // column 0 - QUARTERROUND(x1, x5, x9, x13); // column 1 - QUARTERROUND(x2, x6, x10, x14); // column 2 - QUARTERROUND(x3, x7, x11, x15); // column 3 - QUARTERROUND(x0, x5, x10, x15); // diagonal 1 - QUARTERROUND(x1, x6, x11, x12); // diagonal 2 - QUARTERROUND(x2, x7, x8, x13); // diagonal 3 - QUARTERROUND(x3, x4, x9, x14); // diagonal 4 - } + uint32_t buffer[16]; + chacha20_rounds(buffer, ctx); // Okay, remember about needing that addition? Well, we only // Disclose half of the output, and that ensures the attacker @@ -228,14 +196,8 @@ half_chacha20_rounds(uint32_t output[8], const crypto_chacha_ctx *ctx) // // This lets us avoid a couple additional loads and additions, // for even moar speed. - output[0] = ctx->input[ 0]; // don't add the constant - output[1] = ctx->input[ 1]; // don't add the constant - output[2] = ctx->input[ 2]; // don't add the constant - output[3] = ctx->input[ 3]; // don't add the constant - output[4] = ctx->input[12]; // don't add the counter - output[5] = ctx->input[13]; // don't add the counter - output[6] = ctx->input[14]; // don't add the nonce - output[7] = ctx->input[15]; // don't add the nonce + memcpy(output, buffer , sizeof(uint32_t) * 4); + memcpy(output, buffer + 12, sizeof(uint32_t) * 4); } ////////////////////////////// @@ -336,7 +298,7 @@ init_Xchacha20(crypto_chacha_ctx *ctx, init_constant(ctx ); init_ctr (ctx, ctr ); init_nonce (ctx, nonce + 16); - half_chacha20_rounds(ctx->input + 5, &init_ctx); // init derived key + half_chacha20_block(ctx->input + 5, &init_ctx); // init derived key } static void @@ -348,7 +310,7 @@ encrypt_chacha20(crypto_chacha_ctx *ctx, size_t remaining_bytes = msg_length; for (;;) { uint8_t random_block[64]; - chacha20_rounds(random_block, ctx); + chacha20_block(random_block, ctx); increment_counter(ctx); // the only modification of the context // XOR the last pseudo-random block with the input, @@ -406,7 +368,7 @@ crypto_block_chacha20(const uint8_t key[32], { crypto_chacha_ctx ctx; init_chacha20(&ctx, key, nonce, ctr); - chacha20_rounds(output, &ctx); + chacha20_block(output, &ctx); } void @@ -417,7 +379,7 @@ crypto_block_Xchacha20(const uint8_t key[32], { crypto_chacha_ctx ctx; init_Xchacha20(&ctx, key, nonce, ctr); - chacha20_rounds(output, &ctx); + chacha20_block(output, &ctx); } /////////////////////////////// @@ -451,14 +413,14 @@ crypto_random_bytes(crypto_rng_context *ctx, // fill the output stream block by block while (nb_bytes >= 64) { - chacha20_rounds(out, &ctx->chacha_ctx); + chacha20_block(out, &ctx->chacha_ctx); increment_counter(&ctx->chacha_ctx); out += 64; nb_bytes -= 64; } // Generate one last block and finish this - chacha20_rounds(ctx->reminder, &ctx->chacha_ctx); // there was no reminder + chacha20_block(ctx->reminder, &ctx->chacha_ctx); // there was no reminder increment_counter(&ctx->chacha_ctx); memcpy(out, ctx->reminder, nb_bytes); // those two lines work even ctx->remaining_bytes = 64 - nb_bytes; // when nb_bytes is already 0