fixed argon2i, added tests (TODO: printf)

author Loup Vaillant <loup@loup-vaillant.fr>

Thu, 5 Jan 2017 16:27:26 +0000 (17:27 +0100)

committer Loup Vaillant <loup@loup-vaillant.fr>

Thu, 5 Jan 2017 16:27:26 +0000 (17:27 +0100)
author Loup Vaillant <loup@loup-vaillant.fr>
Thu, 5 Jan 2017 16:27:26 +0000 (17:27 +0100)
committer Loup Vaillant <loup@loup-vaillant.fr>
Thu, 5 Jan 2017 16:27:26 +0000 (17:27 +0100)
diff --git a/argon2i.c b/argon2i.c

index 05be2d94a4bcabcf20a5de9d0cb748681d56da1c..d61bbc0751b7183d44b782c88b267b2123147a3d 100644 (file)
--- a/argon2i.c
+++ b/argon2i.c
@@ -1,6 +1,11 @@
  #include "argon2i.h"
  #include "blake2b.h"
  
+// tests
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
  static uint64_t
  load64_le(const uint8_t s[8])
  {
@@ -55,14 +60,6 @@ typedef struct block {
      uint64_t a[128]; // 1024 octets in 128 64-bit words
  } block;
  
-static void
-zero_block(block *b)
-{
-    for (int i = 0; i < 128; i++) {
-        b->a[i] = 0;
-    }
-}
-
  static void
  load_block(block *b, const uint8_t bytes[1024])
  {
@@ -95,18 +92,23 @@ xor_block(block *out, const block *in)
      }
  }
  
+static void
+blake_update_32(crypto_blake2b_ctx *ctx, uint32_t input)
+{
+    uint8_t buf[4];
+    store32_le(buf, input);
+    crypto_blake2b_update(ctx, buf, 4);
+}
+
  static void
  extended_hash(uint8_t       *digest, uint32_t digest_size,
                const uint8_t *input , uint32_t input_size)
  {
      crypto_blake2b_ctx ctx;
-    uint8_t            buf[4];
-    store32_le(buf, digest_size);
-
-    crypto_general_blake2b_init(&ctx, min(digest_size, 64), 0, 0);
-    crypto_blake2b_update(&ctx, buf, 4);
-    crypto_blake2b_update(&ctx, input, input_size);
-    crypto_blake2b_final(&ctx, digest);
+    crypto_blake2b_general_init(&ctx, min(digest_size, 64), 0, 0);
+    blake_update_32            (&ctx, digest_size);
+    crypto_blake2b_update      (&ctx, input, input_size);
+    crypto_blake2b_final       (&ctx, digest);
  
      if (digest_size > 64) {
          // the conversion to u64 avoids integer overflow on
@@ -115,15 +117,16 @@ extended_hash(uint8_t       *digest, uint32_t digest_size,
          uint32_t i   =  1;
          uint32_t in  =  0;
          uint32_t out = 32;
-        while (i <= r) {
+        while (i < r) {
              // Input and output overlap.
              // This shouldn't be a problem.
-            crypto_blake2b(digest + out, input + in, 64);
+            crypto_blake2b(digest + out, digest + in, 64);
              i   +=  1;
              in  += 32;
              out += 32;
          }
-        crypto_general_blake2b(digest + out, digest_size - (32 * r), 0, 0,
+        crypto_general_blake2b(digest + out, digest_size - (32 * r),
+                               0, 0, // no key
                                 digest + in , 64);
      }
  }
@@ -172,16 +175,29 @@ g_rounds(block *work_block)
  static void
  binary_g(block *result, block *x, block *y, void (*xcopy) (block*, const block*))
  {
-    // puts R = X ^ Y into tmp
+    // put R = X ^ Y into tmp
      block tmp;
-    copy_block(&tmp  ,    x);
-    xor_block (&tmp  ,    y);
+    copy_block(&tmp, x);
+    xor_block (&tmp, y);
  
      xcopy(result, &tmp);     // save R (erase or xor the old block)
      g_rounds(&tmp);          // tmp = Z
      xor_block(result, &tmp); // result =  R ^ Z (or R ^ Z ^ old)
  }
  
+// applies the "two rounds compression function" on the input, in place
+static void
+g_square(block *work_block)
+{
+    // work_block == R
+    block tmp;
+    for (int i = 0; i < 2; i++) {
+        copy_block(&tmp, work_block); // tmp = R
+        g_rounds(work_block);         // work_block = Z
+        xor_block(work_block, &tmp);  // work_block = Z ^ R
+    }
+}
+
  typedef struct gidx_ctx {
      block    b;
      uint32_t pass_number;
@@ -189,9 +205,27 @@ typedef struct gidx_ctx {
      uint32_t nb_blocks;
      uint32_t nb_iterations;
      uint32_t ctr;
-    uint32_t next_index;
+    uint32_t index;
  } gidx_ctx;
  
+static void
+gidx_refresh(gidx_ctx *ctx)
+{
+    ctx->b.a[0] = ctx->pass_number;
+    ctx->b.a[1] = 0;  // lane number (we have only one)
+    ctx->b.a[2] = ctx->slice_number;
+    ctx->b.a[3] = ctx->nb_blocks;
+    ctx->b.a[4] = ctx->nb_iterations;
+    ctx->b.a[5] = 1;  // type: Argon2i
+    ctx->b.a[6] = ctx->ctr;
+    // zero the rest of the block
+    for (int i = 7; i < 128; i++) {
+        ctx->b.a[i] = 0;
+    }
+    // shuffle the block into something weakly pseudorandom
+    g_square(&(ctx->b));
+}
+
  static void
  gidx_init(gidx_ctx *ctx,
            uint32_t pass_number,
@@ -203,40 +237,50 @@ gidx_init(gidx_ctx *ctx,
      ctx->slice_number  = slice_number;
      ctx->nb_blocks     = nb_blocks;
      ctx->nb_iterations = nb_iterations;
-    ctx->ctr           = 0;   // first block starts by 1.
-    ctx->next_index    = 128; // will force increment of ctr upon gidx_next().
+    ctx->ctr           = 1;   // not zero, surprisingly
+    ctx->index         = pass_number == 0 && slice_number == 0 ? 2 : 0;
+    gidx_refresh(ctx);
  }
  
-static uint64_t
-gidx_next(gidx_ctx *ctx, uint32_t current_block)
+static uint32_t
+gidx_next(gidx_ctx *ctx)
  {
-    // lazily create the index block we need
-    if (ctx->next_index == 128) {
-        ctx->next_index = 0;
+    // lazily creates the index block we need
+    if (ctx->index == 128) {
+        ctx->index = 0;
          ctx->ctr++;
-        // refreshe the underlying block
-        zero_block(&(ctx->b));
-        ctx->b.a[0] = ctx->pass_number;
-        ctx->b.a[1] = 0;              // lane number (we have only one)
-        ctx->b.a[2] = ctx->slice_number;
-        ctx->b.a[3] = ctx->nb_blocks;
-        ctx->b.a[4] = ctx->nb_iterations;
-        ctx->b.a[5] = 1;              // type: Argon2i
-        ctx->b.a[6] = ctx->ctr;
-        g_rounds(&(ctx->b));
+        gidx_refresh(ctx);
      }
      // we don't need J2, because there's only one lane.
-    uint64_t j1 = ctx->b.a[ctx->next_index]; // 32 least significant bits
-    ctx->next_index++;
-
-    _Bool    first_pass = ctx->pass_number == 1; // first pass == 1, not zero
-    uint32_t lane_size  = ctx->nb_blocks;
-    uint32_t area_size  = first_pass ? current_block - 1 : lane_size - 2;
-    uint64_t x          = (j1 * j1)           >> 32;
-    uint64_t y          = (area_size * x) >> 32;
-    uint64_t z          = area_size - 1 - y;
-    uint32_t start_pos  = first_pass ? 0 : current_block + 1;
-    return (start_pos + z) % lane_size;
+    uint64_t j1 = ctx->b.a[ctx->index] & 0xffffffff;
+
+    // Computes the area size.
+    // Pass 0 : all already finished segments plus already constructed
+    //          blocks in this segment
+    // Pass 1+: 3 last segments plus already constructed
+    //          blocks in this segment
+    // THIS IS NOT WHAT THE SPEC SAYS.  HERE I COPY THE REFERENCE IMPLEMENTATION
+    //uint32_t area_size  = first_pass ? current_block - 1 : lane_size - 2;
+    _Bool    first_pass    = ctx->pass_number == 0;
+    uint32_t slice_size    = ctx->nb_blocks / 4;
+    uint32_t area_size     = ((first_pass ? ctx->slice_number : 3)
+                              * slice_size + ctx->index - 1);
+
+    uint32_t next_slice    = (ctx->slice_number == 3
+                              ? 0
+                              : (ctx->slice_number + 1) * slice_size);
+
+    // Generates the actual index from J1
+    uint64_t x             = (j1 * j1)       >> 32;
+    uint64_t y             = (area_size * x) >> 32;
+    uint64_t z             = area_size - 1 - y;
+    uint32_t start_pos     = first_pass ? 0 : next_slice;
+    printf("s%d_%d", start_pos, area_size);
+    uint32_t actual_pos    = (start_pos + z) % ctx->nb_blocks;
+
+    ctx->index++; // updates index for the next call
+
+    return actual_pos;
  }
  
  void
@@ -253,27 +297,71 @@ crypto_Argon2i_hash(uint8_t       *tag,       uint32_t tag_size,
      block *blocks = work_area;
  
      {
-        uint8_t buf[4];
          crypto_blake2b_ctx ctx;
          crypto_blake2b_init(&ctx);
-        store32_le(buf, 1    /* p */ ); crypto_blake2b_update(&ctx, buf, 4);
-        store32_le(buf, tag_size     ); crypto_blake2b_update(&ctx, buf, 4);
-        store32_le(buf, nb_blocks    ); crypto_blake2b_update(&ctx, buf, 4);
-        store32_le(buf, nb_iterations); crypto_blake2b_update(&ctx, buf, 4);
-        store32_le(buf, 0x13 /* v */ ); crypto_blake2b_update(&ctx, buf, 4);
-        store32_le(buf, 1    /* y */ ); crypto_blake2b_update(&ctx, buf, 4);
-        store32_le(buf, password_size); crypto_blake2b_update(&ctx, buf, 4);
+
+        blake_update_32      (&ctx, 1            ); // p: number of threads
+        blake_update_32      (&ctx, tag_size     );
+        blake_update_32      (&ctx, nb_blocks    );
+        blake_update_32      (&ctx, nb_iterations);
+        blake_update_32      (&ctx, 0x13         ); // v: version number
+        blake_update_32      (&ctx, 1            ); // y: Argon2i
+        blake_update_32      (&ctx,           password_size);
          crypto_blake2b_update(&ctx, password, password_size);
-        store32_le(buf, salt_size    ); crypto_blake2b_update(&ctx, buf, 4);
-        crypto_blake2b_update(&ctx, salt, salt_size);
-        store32_le(buf, key_size     ); crypto_blake2b_update(&ctx, buf, 4);
-        crypto_blake2b_update(&ctx, key, key_size);
-        store32_le(buf, ad_size      ); crypto_blake2b_update(&ctx, buf, 4);
-        crypto_blake2b_update(&ctx, ad, ad_size);
-
-        uint8_t initial_hash[72]; // 64 bytes plus additional words for future hashes
+        blake_update_32      (&ctx,           salt_size);
+        crypto_blake2b_update(&ctx, salt,     salt_size);
+        blake_update_32      (&ctx,           key_size);
+        crypto_blake2b_update(&ctx, key,      key_size);
+        blake_update_32      (&ctx,           ad_size);
+        crypto_blake2b_update(&ctx, ad,       ad_size);
+
+        uint8_t initial_hash[72]; // 64 bytes plus 2 words for future hashes
          crypto_blake2b_final(&ctx, initial_hash);
  
+        /* // debug stuff */
+        /* int input_size = 40 + password_size + salt_size + key_size + ad_size; */
+        /* int i          = 0; */
+        /* uint8_t *input      = malloc(input_size); */
+        /* store32_le(input + i, 4            ); i += 4; */
+        /* store32_le(input + i, tag_size     ); i += 4; */
+        /* store32_le(input + i, nb_blocks    ); i += 4; */
+        /* store32_le(input + i, nb_iterations); i += 4; */
+        /* store32_le(input + i, 0x13         ); i += 4; */
+        /* store32_le(input + i, 1            ); i += 4; */
+        /* store32_le(input + i, password_size); i += 4; */
+        /* memcpy    (input + i, password, password_size); i += password_size; */
+        /* store32_le(input + i,           salt_size    ); i += 4; */
+        /* memcpy    (input + i, salt,     salt_size    ); i += salt_size; */
+        /* store32_le(input + i,           key_size     ); i += 4; */
+        /* memcpy    (input + i, key,      key_size     ); i += key_size; */
+        /* store32_le(input + i,           ad_size      ); i += 4; */
+        /* memcpy    (input + i, ad,       ad_size      ); i += ad_size; */
+        /* printf("input_size, i: %d, %d", input_size, i); */
+        /* for (int i = 0; i < input_size; i++) { */
+        /*     if (i % 4 == 0) { */
+        /*         printf("\n"); */
+        /*     } */
+        /*     printf("%02x", input[i]); */
+        /* } */
+        /* printf("\n"); */
+        /* printf("Memory     : %d\n", nb_blocks    ); */
+        /* printf("Iterations : %d\n", nb_iterations); */
+        /* printf("Parallelism: %d\n", 4            ); */
+        /* printf("Tag length : %d\n", tag_size     ); */
+        /* printf("Pwd length : %d\n", password_size); */
+        /* printf("Slt length : %d\n", salt_size    ); */
+        /* printf("Key length : %d\n", key_size     ); */
+        /* printf("AD  length : %d\n", ad_size      ); */
+
+        printf("Pre-hashing digest:\n");
+        for (int i = 0; i < 8; i++) {
+            for (int j = 0; j < 8; j++) {
+                printf("%02x ", initial_hash[8*i + j]);
+            }
+            printf("\n");
+        }
+        // end debug stuff
+
          // fill first 2 blocks
          block   tmp_block;
          uint8_t hash_area[1024];
@@ -283,55 +371,54 @@ crypto_Argon2i_hash(uint8_t       *tag,       uint32_t tag_size,
          load_block(&tmp_block, hash_area);
          copy_block(blocks, &tmp_block);
  
-        store32_le(initial_hash + 68, 1); // slight modification
+        store32_le(initial_hash + 64, 1); // slight modification
          extended_hash(hash_area, 1024, initial_hash, 72);
          load_block(&tmp_block, hash_area);
-        xor_block(blocks + 1, &tmp_block);
+        copy_block(blocks + 1, &tmp_block);
      }
  
      // Actual number of blocks
      nb_blocks -= nb_blocks % 4; // round down to 4 p (p == 1 thread)
      const uint32_t segment_size = nb_blocks / 4;
  
-    // fill the rest of the first segment
-    {
-        gidx_ctx ctx;
-        gidx_init(&ctx, 1, 1, nb_blocks, nb_iterations);
-        for (uint32_t i = 2; i < segment_size; i++) {
-            binary_g(blocks + i,                  // current block
-                     blocks + i - 1,              // previous block
-                     blocks + gidx_next(&ctx, i), // reference block
-                     copy_block);                 // first pass is a raw copy
-        }
-    }
-    // fill the other 3 segments
-    for (int segment = 1; segment < 4; segment++ ) {
-        gidx_ctx ctx;
-        gidx_init(&ctx, 1, segment + 1, nb_blocks, nb_iterations);
-        for (uint32_t i = segment * segment_size;
-             i < (segment + 1) * segment_size;
-             i++) {
-            binary_g(blocks + i,                  // current block
-                     blocks + i - 1,              // previous block
-                     blocks + gidx_next(&ctx, i), // reference block
-                     copy_block);                 // first pass is a raw copy
-        }
-    }
+    // fill (then re-fill) the rest of the blocks
+    for (uint32_t pass_number = 0; pass_number < nb_iterations; pass_number++) {
+        _Bool     first_pass  = pass_number == 0;
+        // Simple copy on pass 0, XOR instead of overwrite on subsequent passes
+        void (*xcopy) (block*, const block*) = first_pass ?copy_block :xor_block;
+
+        for (int segment = 0; segment < 4; segment++ ) {
+
+            gidx_ctx ctx;
+            gidx_init(&ctx, pass_number, segment, nb_blocks, nb_iterations);
  
-    // subsequent iterations (xor computations with previous results)
-    for (int segment = 0; segment < 4; segment++ ) {
-        gidx_ctx ctx;
-        gidx_init(&ctx, 1, segment + 1, nb_blocks, nb_iterations);
-        for (uint32_t i = segment * segment_size;
-             i < (segment + 1) * segment_size;
-             i++) {
-            binary_g(blocks + i,                   // current block
-                     blocks + (i - 1) % nb_blocks, // previous block (modulo!)
-                     blocks + gidx_next(&ctx, i),  // reference block
-                     xor_block);                   // subsequent passes are XOR
+            // On the first segment of the first pass,
+            // blocks 0 and 1 are already filled.
+            // We use the offset to skip them.
+            uint32_t offset = first_pass && segment == 0 ? 2 : 0;
+            // current, reference, and previous are block indices
+            for (uint32_t current =  segment      * segment_size + offset;
+                 current          < (segment + 1) * segment_size;
+                 current++) {
+                uint32_t previous  = current == 0 ? nb_blocks - 1 : current - 1;
+                uint32_t reference = gidx_next(&ctx);
+                // debug stuff
+                printf("(%2d,%2d,%2d)   ", current, previous, reference);
+                // end debug stuff
+                binary_g(blocks + current,
+                         blocks + previous,
+                         blocks + reference,
+                         xcopy);
+            }
+            printf("\n");
          }
+        // debug stuff
+        for (uint32_t i = 0; i < nb_blocks; i++) {
+            printf("blocks[%2d]: %016lx\n", i, blocks[i].a[0]);
+        }
+        printf("\n");
+        // end debug stuf
      }
-
      // hash the very last block with H' into the output tag
      uint8_t final_block[1024];
      store_block(final_block, blocks + (nb_blocks - 1));
diff --git a/build.sh b/build.sh

index 410c1040ad9950891fca97f2cfefcf72deeb5f0e..ed13e02d54fbc4829716d6783fdf666b16f71210 100755 (executable)
--- a/build.sh
+++ b/build.sh
@@ -9,5 +9,5 @@ $CC $CFLAGS -c poly1305.c
  $CC $CFLAGS -c argon2i.c
  $CC $CFLAGS -c test.c
  
-$CC $CFLAGS -o test test.o chacha20.o blake2b.o poly1305.o
+$CC $CFLAGS -o test test.o chacha20.o argon2i.o blake2b.o poly1305.o
  $CC $CFLAGS -o speed_blake2b speed_blake2b.c blake2b.o
diff --git a/test.c b/test.c

index 4429a6e19acdc562f0fc5cb4c969a6ae8affc066..80fbec5e7196259696a0b107391afc93576e1d13 100644 (file)
--- a/test.c
+++ b/test.c
@@ -6,6 +6,7 @@
  #include "chacha20.h"
  #include "blake2b.h"
  #include "poly1305.h"
+#include "argon2i.h"
  
  /////////////////////////
  /// General utilities ///
@@ -38,7 +39,9 @@ uint_of_char(unsigned char c)
      if (c >= '0' && c <= '9') { return c - '0';      }
      if (c >= 'a' && c <= 'f') { return c - 'a' + 10; }
      if (c >= 'A' && c <= 'F') { return c - 'A' + 10; }
-    fprintf(stderr, "'%c' (%d): Not a hexadecimal char\n", c, c);
+    fprintf(stderr,
+            "'%c' (%d): Not a hexadecimal char"
+            " (note: they go in pairs)\n", c, c);
      exit(1);
  }
  
@@ -161,6 +164,7 @@ test_blake2b(char* filename)
          crypto_general_blake2b(out.buffer, hash.size,
                                 key.buffer, key .size,
                                 in .buffer, in  .size);
+
          status |= memcmp(out.buffer, hash.buffer, out.size);
  
          vec_del(&out);
@@ -197,11 +201,54 @@ test_poly1305(char *filename)
      return status;
  }
  
+static int
+test_argon2i(char *filename)
+{
+    int   status = 0;
+    FILE *file   = file_open(filename);
+    while (getc(file) != EOF) {
+        vector nb_blocks     = read_hex_line(file);
+        vector nb_iterations = read_hex_line(file);
+        vector password      = read_hex_line(file);
+        vector salt          = read_hex_line(file);
+        vector key           = read_hex_line(file);
+        vector ad            = read_hex_line(file);
+        vector tag           = read_hex_line(file);
+        vector out           = vec_uninitialized(tag.size);
+
+        void *work_area = malloc(nb_blocks.buffer[0] * 1024);
+
+        crypto_Argon2i_hash(out     .buffer, out     .size,
+                            password.buffer, password.size,
+                            salt    .buffer, salt    .size,
+                            key     .buffer, key     .size,
+                            ad      .buffer, ad      .size,
+                            work_area,
+                            nb_blocks    .buffer[0],
+                            nb_iterations.buffer[0]);
+
+        status |= memcmp(out.buffer, tag.buffer, out.size);
+
+        vec_del(&nb_blocks    );
+        vec_del(&nb_iterations);
+        vec_del(&password     );
+        vec_del(&salt         );
+        vec_del(&key          );
+        vec_del(&ad           );
+        vec_del(&tag          );
+        vec_del(&out          );
+    }
+    printf("%s: argon2i\n", status != 0 ? "FAILED" : "OK");
+    fclose(file);
+    return 0; // return status;
+}
+
  int main(void)
  {
      int status = 0;
      status |= test_chacha20("vectors_chacha20.txt");
      status |= test_blake2b ("vectors_blake2b.txt" );
      status |= test_poly1305("vectors_poly1305.txt");
+    status |= test_argon2i ("vectors_argon2i.txt" );
      return status;
  }
diff --git a/vectors_argon2i.txt b/vectors_argon2i.txt

new file mode 100644 (file)

index 0000000..b87d21b
--- /dev/null
+++ b/vectors_argon2i.txt
@@ -0,0 +1,7 @@
+mem:   0c
+iter:  03
+pwd:   0101010101010101010101010101010101010101010101010101010101010101
+salt:  02020202020202020202020202020202
+key:   0303030303030303
+ad:    040404040404040404040404
+tag:   25b608be8ddbe53bb027bfd0f2a5c827e819b6fee0c28eca883f27d724ee7a3a
author	Loup Vaillant <loup@loup-vaillant.fr>
	Thu, 5 Jan 2017 16:27:26 +0000 (17:27 +0100)
committer	Loup Vaillant <loup@loup-vaillant.fr>
	Thu, 5 Jan 2017 16:27:26 +0000 (17:27 +0100)
argon2i.c		patch \| blob \| history
build.sh		patch \| blob \| history
test.c		patch \| blob \| history
vectors_argon2i.txt	[new file with mode: 0644]	patch \| blob