removed duplicated code

author Loup Vaillant <loup@loup-vaillant.fr>

Sun, 4 Sep 2016 00:13:04 +0000 (02:13 +0200)

committer Loup Vaillant <loup@loup-vaillant.fr>

Sun, 4 Sep 2016 00:13:04 +0000 (02:13 +0200)
author Loup Vaillant <loup@loup-vaillant.fr>
Sun, 4 Sep 2016 00:13:04 +0000 (02:13 +0200)
committer Loup Vaillant <loup@loup-vaillant.fr>
Sun, 4 Sep 2016 00:13:04 +0000 (02:13 +0200)
diff --git a/chacha20.c b/chacha20.c

index 41b14f2e96ae38f0e77ede60e867b74073bb1cd8..97b9c9d84b1cfbff818246af16195ec03e4ae9ca 100644 (file)
--- a/chacha20.c
+++ b/chacha20.c
@@ -55,28 +55,14 @@ increment_counter(crypto_chacha_ctx *ctx)
  /// Round functions ///
  ///////////////////////
  
-// A quarter round is meant to mangle a fourth of a chacha context.
-// (a line, a column, or any subset you can think of).  Enough of
-// these rounds, carefully chosen, will garble the input beyond
-// recognition.
+
+// Mangles the chacha context into a random-looking output.
  //
  // WARNING: THIS OPERATION IS REVERSIBLE.
-//
-// If you build a crypto primitive on top of it without additional
-// precautions, any rookie cryptanalist can break it.
-#define QUARTERROUND(a, b, c, d)       \
-    a = a + b;  d = rotl32(d ^ a, 16); \
-    c = c + d;  b = rotl32(b ^ c, 12); \
-    a = a + b;  d = rotl32(d ^ a,  8); \
-    c = c + d;  b = rotl32(b ^ c,  7)
-
-// Mangles the chacha context into a pseudorandom output
-// That is, if you don't know the key stored in the context,
-// you can't deduce squat about the output.  This is true
-// even if you know the output of many other contexes, even
-// if they differ from this one by only one bit.
+// We still need to take steps to ensure the attacker can't
+// deduce the input (and with it, the key) from the output.
  static void
-chacha20_rounds(uint8_t output[64], const crypto_chacha_ctx *ctx)
+chacha20_rounds(uint32_t output[16], const crypto_chacha_ctx *ctx)
  {
      // Local variables instead of indices, to facilitate optimisations
      // TODO: test this shit.  The speed increase might be small.
@@ -104,6 +90,21 @@ chacha20_rounds(uint8_t output[64], const crypto_chacha_ctx *ctx)
          //  8  9 10 11
          // 12 13 14 15
  
+        // A quarter round is meant to mangle a fourth of a chacha context.
+        // (a line, a column, or any subset you can think of).  Enough of
+        // these rounds, carefully chosen, will garble the input beyond
+        // recognition.
+        //
+        // WARNING: THIS OPERATION IS REVERSIBLE.
+        //
+        // If you build a crypto primitive on top of it without additional
+        // precautions, any rookie cryptanalist can break it.
+#define QUARTERROUND(a, b, c, d)           \
+        a = a + b;  d = rotl32(d ^ a, 16); \
+        c = c + d;  b = rotl32(b ^ c, 12); \
+        a = a + b;  d = rotl32(d ^ a,  8); \
+        c = c + d;  b = rotl32(b ^ c,  7)
+
          // Column round.  Very SIMD friendly, if you want high performance.
          QUARTERROUND(x0, x4,  x8, x12); // column 0
          QUARTERROUND(x1, x5,  x9, x13); // column 1
@@ -118,6 +119,30 @@ chacha20_rounds(uint8_t output[64], const crypto_chacha_ctx *ctx)
          QUARTERROUND(x3, x4,  x9, x14); // diagonal 4
      }
  
+    output[ 0] = x0;
+    output[ 1] = x1;
+    output[ 2] = x2;
+    output[ 3] = x3;
+    output[ 4] = x4;
+    output[ 5] = x5;
+    output[ 6] = x6;
+    output[ 7] = x7;
+    output[ 8] = x8;
+    output[ 9] = x9;
+    output[10] = x10;
+    output[11] = x11;
+    output[12] = x12;
+    output[13] = x13;
+    output[14] = x14;
+    output[15] = x15;
+}
+
+static void
+chacha20_block(uint8_t output[64], const crypto_chacha_ctx *ctx)
+{
+    uint32_t buffer[16];
+    chacha20_rounds(buffer, ctx);
+
      // Now our buffer is seriously garbled.  However, it is still easy
      // to deduce the initial context from it: just invert the quarter
      // rounds and apply that in reverse order.
@@ -143,77 +168,20 @@ chacha20_rounds(uint8_t output[64], const crypto_chacha_ctx *ctx)
      // the performance of naive implementations such as this one.  With
      // SIMD, it's faster to just add the lot, so that's what the standard
      // does.
-    x0  += ctx->input[ 0];
-    x1  += ctx->input[ 1];
-    x2  += ctx->input[ 2];
-    x3  += ctx->input[ 3];
-    x4  += ctx->input[ 4];
-    x5  += ctx->input[ 5];
-    x6  += ctx->input[ 6];
-    x7  += ctx->input[ 7];
-    x8  += ctx->input[ 8];
-    x9  += ctx->input[ 9];
-    x10 += ctx->input[10];
-    x11 += ctx->input[11];
-    x12 += ctx->input[12];
-    x13 += ctx->input[13];
-    x14 += ctx->input[14];
-    x15 += ctx->input[15];
-
-    // finally, we can output our buffer
-    store32_le(output +  0, x0 );
-    store32_le(output +  4, x1 );
-    store32_le(output +  8, x2 );
-    store32_le(output + 12, x3 );
-    store32_le(output + 16, x4 );
-    store32_le(output + 20, x5 );
-    store32_le(output + 24, x6 );
-    store32_le(output + 28, x7 );
-    store32_le(output + 32, x8 );
-    store32_le(output + 36, x9 );
-    store32_le(output + 40, x10);
-    store32_le(output + 44, x11);
-    store32_le(output + 48, x12);
-    store32_le(output + 52, x13);
-    store32_le(output + 56, x14);
-    store32_le(output + 60, x15);
+    for (unsigned i = 0; i < 16; i++) {
+        uint32_t sum = buffer[i] + ctx->input[i];
+        store32_le(output + i*4, sum);
+    }
  }
  
-// This one is the same as chacha20_rounds, only it gives you only
+// This one is the same as chacha20_block, only it gives you only
  // half the output (256 bytes). It's basically the same as HSalsa20,
  // except build on ChaCha.  It is provably as secure as ChaCha20
  static void
-half_chacha20_rounds(uint32_t output[8], const crypto_chacha_ctx *ctx)
+half_chacha20_block(uint32_t output[8], const crypto_chacha_ctx *ctx)
  {
-    // Copy pasta rom chacha20_rounds
-    uint32_t x0  = ctx->input[ 0];
-    uint32_t x1  = ctx->input[ 1];
-    uint32_t x2  = ctx->input[ 2];
-    uint32_t x3  = ctx->input[ 3];
-    uint32_t x4  = ctx->input[ 4];
-    uint32_t x5  = ctx->input[ 5];
-    uint32_t x6  = ctx->input[ 6];
-    uint32_t x7  = ctx->input[ 7];
-    uint32_t x8  = ctx->input[ 8];
-    uint32_t x9  = ctx->input[ 9];
-    uint32_t x10 = ctx->input[10];
-    uint32_t x11 = ctx->input[11];
-    uint32_t x12 = ctx->input[12];
-    uint32_t x13 = ctx->input[13];
-    uint32_t x14 = ctx->input[14];
-    uint32_t x15 = ctx->input[15];
-
-    // Copy pasta rom chacha20_rounds
-    for (int i = 20; i > 0; i -= 2) {
-        QUARTERROUND(x0, x4,  x8, x12); // column 0
-        QUARTERROUND(x1, x5,  x9, x13); // column 1
-        QUARTERROUND(x2, x6, x10, x14); // column 2
-        QUARTERROUND(x3, x7, x11, x15); // column 3
-        QUARTERROUND(x0, x5, x10, x15); // diagonal 1
-        QUARTERROUND(x1, x6, x11, x12); // diagonal 2
-        QUARTERROUND(x2, x7,  x8, x13); // diagonal 3
-        QUARTERROUND(x3, x4,  x9, x14); // diagonal 4
-    }
+    uint32_t buffer[16];
+    chacha20_rounds(buffer, ctx);
  
      // Okay, remember about needing that addition?  Well, we only
      // Disclose half of the output, and that ensures the attacker
@@ -228,14 +196,8 @@ half_chacha20_rounds(uint32_t output[8], const crypto_chacha_ctx *ctx)
      //
      // This lets us avoid a couple additional loads and additions,
      // for even moar speed.
-    output[0] = ctx->input[ 0]; // don't add the constant
-    output[1] = ctx->input[ 1]; // don't add the constant
-    output[2] = ctx->input[ 2]; // don't add the constant
-    output[3] = ctx->input[ 3]; // don't add the constant
-    output[4] = ctx->input[12]; // don't add the counter
-    output[5] = ctx->input[13]; // don't add the counter
-    output[6] = ctx->input[14]; // don't add the nonce
-    output[7] = ctx->input[15]; // don't add the nonce
+    memcpy(output, buffer     , sizeof(uint32_t) * 4);
+    memcpy(output, buffer + 12, sizeof(uint32_t) * 4);
  }
  
  //////////////////////////////
@@ -336,7 +298,7 @@ init_Xchacha20(crypto_chacha_ctx *ctx,
      init_constant(ctx            );
      init_ctr     (ctx, ctr       );
      init_nonce   (ctx, nonce + 16);
-    half_chacha20_rounds(ctx->input + 5, &init_ctx); // init derived key
+    half_chacha20_block(ctx->input + 5, &init_ctx); // init derived key
  }
  
  static void
@@ -348,7 +310,7 @@ encrypt_chacha20(crypto_chacha_ctx *ctx,
      size_t remaining_bytes = msg_length;
      for (;;) {
          uint8_t random_block[64];
-        chacha20_rounds(random_block, ctx);
+        chacha20_block(random_block, ctx);
          increment_counter(ctx); // the only modification of the context
  
          // XOR the last pseudo-random block with the input,
@@ -406,7 +368,7 @@ crypto_block_chacha20(const uint8_t key[32],
  {
      crypto_chacha_ctx ctx;
      init_chacha20(&ctx, key, nonce, ctr);
-    chacha20_rounds(output, &ctx);
+    chacha20_block(output, &ctx);
  }
  
  void
@@ -417,7 +379,7 @@ crypto_block_Xchacha20(const uint8_t key[32],
  {
      crypto_chacha_ctx ctx;
      init_Xchacha20(&ctx, key, nonce, ctr);
-    chacha20_rounds(output, &ctx);
+    chacha20_block(output, &ctx);
  }
  
  ///////////////////////////////
@@ -451,14 +413,14 @@ crypto_random_bytes(crypto_rng_context *ctx,
  
      // fill the output stream block by block
      while (nb_bytes >= 64) {
-        chacha20_rounds(out, &ctx->chacha_ctx);
+        chacha20_block(out, &ctx->chacha_ctx);
          increment_counter(&ctx->chacha_ctx);
          out      += 64;
          nb_bytes -= 64;
      }
  
      // Generate one last block and finish this
-    chacha20_rounds(ctx->reminder, &ctx->chacha_ctx); // there was no reminder
+    chacha20_block(ctx->reminder, &ctx->chacha_ctx); // there was no reminder
      increment_counter(&ctx->chacha_ctx);
      memcpy(out, ctx->reminder, nb_bytes); // those two lines work even
      ctx->remaining_bytes = 64 - nb_bytes; // when nb_bytes is already 0
author	Loup Vaillant <loup@loup-vaillant.fr>
	Sun, 4 Sep 2016 00:13:04 +0000 (02:13 +0200)
committer	Loup Vaillant <loup@loup-vaillant.fr>
	Sun, 4 Sep 2016 00:13:04 +0000 (02:13 +0200)