From 720f48d2269c41fdcde9231d86bc3bbbf9b921bc Mon Sep 17 00:00:00 2001 From: Loup Vaillant Date: Thu, 5 Jan 2017 20:28:20 +0100 Subject: [PATCH] Argon2i clean up and comments --- argon2i.c | 112 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 67 insertions(+), 45 deletions(-) diff --git a/argon2i.c b/argon2i.c index 2d52e34..bbaefba 100644 --- a/argon2i.c +++ b/argon2i.c @@ -1,6 +1,10 @@ #include "argon2i.h" #include "blake2b.h" +///////////////// +/// Utilities /// +///////////////// + static uint64_t load64_le(const uint8_t s[8]) { @@ -45,12 +49,24 @@ rotr64(uint64_t x, uint64_t y) return (x >> y) ^ (x << (64 - y)); } -static size_t -min(size_t a, size_t b) +static uint32_t +min(uint32_t a, uint32_t b) { return a <= b ? a : b; } +// updates a blake2 hash with a 32 bit word, little endian. +static void +blake_update_32(crypto_blake2b_ctx *ctx, uint32_t input) +{ + uint8_t buf[4]; + store32_le(buf, input); + crypto_blake2b_update(ctx, buf, 4); +} + +////////////////// +// Argon2 block // +////////////////// typedef struct block { uint64_t a[128]; // 1024 octets in 128 64-bit words } block; @@ -87,14 +103,13 @@ xor_block(block *out, const block *in) } } -static void -blake_update_32(crypto_blake2b_ctx *ctx, uint32_t input) -{ - uint8_t buf[4]; - store32_le(buf, input); - crypto_blake2b_update(ctx, buf, 4); -} +//////////////////// +// Argon2i proper // +//////////////////// +// Hash with a virtually unlimited digest size. +// Doesn't extract more entropy than the base hash function. +// Mainly used for filling a whole kilobyte block with pseudo-random bytes. static void extended_hash(uint8_t *digest, uint32_t digest_size, const uint8_t *input , uint32_t input_size) @@ -126,7 +141,7 @@ extended_hash(uint8_t *digest, uint32_t digest_size, } } -// Computes Z from R in place +// Core of the compression function G. Computes Z from R in place. static void g_rounds(block *work_block) { @@ -167,8 +182,12 @@ g_rounds(block *work_block) } } +// The compression function G +// may overwrite result completely (xcopy == copy_block), +// or XOR result with the old block (xcopy == xor_block) static void -binary_g(block *result, block *x, block *y, void (*xcopy) (block*, const block*)) +binary_g(block *result, const block *x, const block *y, + void (*xcopy) (block*, const block*)) { // put R = X ^ Y into tmp block tmp; @@ -180,17 +199,17 @@ binary_g(block *result, block *x, block *y, void (*xcopy) (block*, const block*) xor_block(result, &tmp); // result = R ^ Z (or R ^ Z ^ old) } -// applies the "two rounds compression function" on the input, in place +// unary version of the compression function. +// The missing argument is implied zero. +// Does the transformation in place. static void -g_square(block *work_block) +unary_g(block *work_block) { // work_block == R block tmp; - for (int i = 0; i < 2; i++) { - copy_block(&tmp, work_block); // tmp = R - g_rounds(work_block); // work_block = Z - xor_block(work_block, &tmp); // work_block = Z ^ R - } + copy_block(&tmp, work_block); // tmp = R + g_rounds(work_block); // work_block = Z + xor_block(work_block, &tmp); // work_block = Z ^ R } typedef struct gidx_ctx { @@ -217,8 +236,11 @@ gidx_refresh(gidx_ctx *ctx) for (int i = 7; i < 128; i++) { ctx->b.a[i] = 0; } - // shuffle the block into something weakly pseudorandom - g_square(&(ctx->b)); + + // Shuffle the block thus: ctx->b = G((G(ctx->b, zero)), zero) + // Applies the G "square" function to get cheap pseudo-random numbers. + unary_g(&(ctx->b)); + unary_g(&(ctx->b)); // square means apply it twice } static void @@ -246,37 +268,38 @@ gidx_next(gidx_ctx *ctx) ctx->ctr++; gidx_refresh(ctx); } - // we don't need J2, because there's only one lane. - uint64_t j1 = ctx->b.a[ctx->index] & 0xffffffff; + // saves and increment the index + uint32_t index = ctx->index; + ctx->index++; // updates index for the next call // Computes the area size. // Pass 0 : all already finished segments plus already constructed // blocks in this segment // Pass 1+: 3 last segments plus already constructed - // blocks in this segment - // THIS IS NOT WHAT THE SPEC SAYS. HERE I COPY THE REFERENCE IMPLEMENTATION - //uint32_t area_size = first_pass ? current_block - 1 : lane_size - 2; - _Bool first_pass = ctx->pass_number == 0; - uint32_t slice_size = ctx->nb_blocks / 4; - uint32_t area_size = ((first_pass ? ctx->slice_number : 3) - * slice_size + ctx->index - 1); - - uint32_t next_slice = (ctx->slice_number == 3 - ? 0 - : (ctx->slice_number + 1) * slice_size); - - // Generates the actual index from J1 - uint64_t x = (j1 * j1) >> 32; - uint64_t y = (area_size * x) >> 32; - uint64_t z = area_size - 1 - y; - uint32_t start_pos = first_pass ? 0 : next_slice; - uint32_t actual_pos = (start_pos + z) % ctx->nb_blocks; - - ctx->index++; // updates index for the next call - - return actual_pos; + // blocks in this segment THE SPEC SUGGESTS OTHERWISE. + // I CONFORM TO THE REFERENCE IMPLEMENTATION. + _Bool first_pass = ctx->pass_number == 0; + uint32_t slice_size = ctx->nb_blocks / 4; + uint32_t area_size = ((first_pass ? ctx->slice_number : 3) + * slice_size + index - 1); + + // Computes the starting position of the reference area. + // CONTRARY TO WHAT THE SPEC SUGGESTS, IT STARTS AT THE + // NEXT SEGMENT, NOT THE NEXT BLOCK. + uint32_t next_slice = (ctx->slice_number == 3 + ? 0 + : (ctx->slice_number + 1) * slice_size); + uint32_t start_pos = first_pass ? 0 : next_slice; + + // Generates the actual index from J1 (no need for J2, there's only one lane) + uint64_t j1 = ctx->b.a[index] & 0xffffffff; // pseudo-random number + uint64_t x = (j1 * j1) >> 32; + uint64_t y = (area_size * x) >> 32; + uint64_t z = area_size - 1 - y; + return (start_pos + z) % ctx->nb_blocks; } +// Main algorithm void crypto_Argon2i_hash(uint8_t *tag, uint32_t tag_size, const uint8_t *password, uint32_t password_size, @@ -289,7 +312,6 @@ crypto_Argon2i_hash(uint8_t *tag, uint32_t tag_size, { // work area seen as blocks (must be suitably aligned) block *blocks = work_area; - { crypto_blake2b_ctx ctx; crypto_blake2b_init(&ctx); -- 2.47.3