From: Loup Vaillant Date: Thu, 5 Jan 2017 16:27:26 +0000 (+0100) Subject: fixed argon2i, added tests (TODO: printf) X-Git-Url: https://git.codecow.com/?a=commitdiff_plain;h=8f5278ff0867452fb0a6c4cb40779a7ab37314df;p=Monocypher.git fixed argon2i, added tests (TODO: printf) --- diff --git a/argon2i.c b/argon2i.c index 05be2d9..d61bbc0 100644 --- a/argon2i.c +++ b/argon2i.c @@ -1,6 +1,11 @@ #include "argon2i.h" #include "blake2b.h" +// tests +#include +#include +#include + static uint64_t load64_le(const uint8_t s[8]) { @@ -55,14 +60,6 @@ typedef struct block { uint64_t a[128]; // 1024 octets in 128 64-bit words } block; -static void -zero_block(block *b) -{ - for (int i = 0; i < 128; i++) { - b->a[i] = 0; - } -} - static void load_block(block *b, const uint8_t bytes[1024]) { @@ -95,18 +92,23 @@ xor_block(block *out, const block *in) } } +static void +blake_update_32(crypto_blake2b_ctx *ctx, uint32_t input) +{ + uint8_t buf[4]; + store32_le(buf, input); + crypto_blake2b_update(ctx, buf, 4); +} + static void extended_hash(uint8_t *digest, uint32_t digest_size, const uint8_t *input , uint32_t input_size) { crypto_blake2b_ctx ctx; - uint8_t buf[4]; - store32_le(buf, digest_size); - - crypto_general_blake2b_init(&ctx, min(digest_size, 64), 0, 0); - crypto_blake2b_update(&ctx, buf, 4); - crypto_blake2b_update(&ctx, input, input_size); - crypto_blake2b_final(&ctx, digest); + crypto_blake2b_general_init(&ctx, min(digest_size, 64), 0, 0); + blake_update_32 (&ctx, digest_size); + crypto_blake2b_update (&ctx, input, input_size); + crypto_blake2b_final (&ctx, digest); if (digest_size > 64) { // the conversion to u64 avoids integer overflow on @@ -115,15 +117,16 @@ extended_hash(uint8_t *digest, uint32_t digest_size, uint32_t i = 1; uint32_t in = 0; uint32_t out = 32; - while (i <= r) { + while (i < r) { // Input and output overlap. // This shouldn't be a problem. - crypto_blake2b(digest + out, input + in, 64); + crypto_blake2b(digest + out, digest + in, 64); i += 1; in += 32; out += 32; } - crypto_general_blake2b(digest + out, digest_size - (32 * r), 0, 0, + crypto_general_blake2b(digest + out, digest_size - (32 * r), + 0, 0, // no key digest + in , 64); } } @@ -172,16 +175,29 @@ g_rounds(block *work_block) static void binary_g(block *result, block *x, block *y, void (*xcopy) (block*, const block*)) { - // puts R = X ^ Y into tmp + // put R = X ^ Y into tmp block tmp; - copy_block(&tmp , x); - xor_block (&tmp , y); + copy_block(&tmp, x); + xor_block (&tmp, y); xcopy(result, &tmp); // save R (erase or xor the old block) g_rounds(&tmp); // tmp = Z xor_block(result, &tmp); // result = R ^ Z (or R ^ Z ^ old) } +// applies the "two rounds compression function" on the input, in place +static void +g_square(block *work_block) +{ + // work_block == R + block tmp; + for (int i = 0; i < 2; i++) { + copy_block(&tmp, work_block); // tmp = R + g_rounds(work_block); // work_block = Z + xor_block(work_block, &tmp); // work_block = Z ^ R + } +} + typedef struct gidx_ctx { block b; uint32_t pass_number; @@ -189,9 +205,27 @@ typedef struct gidx_ctx { uint32_t nb_blocks; uint32_t nb_iterations; uint32_t ctr; - uint32_t next_index; + uint32_t index; } gidx_ctx; +static void +gidx_refresh(gidx_ctx *ctx) +{ + ctx->b.a[0] = ctx->pass_number; + ctx->b.a[1] = 0; // lane number (we have only one) + ctx->b.a[2] = ctx->slice_number; + ctx->b.a[3] = ctx->nb_blocks; + ctx->b.a[4] = ctx->nb_iterations; + ctx->b.a[5] = 1; // type: Argon2i + ctx->b.a[6] = ctx->ctr; + // zero the rest of the block + for (int i = 7; i < 128; i++) { + ctx->b.a[i] = 0; + } + // shuffle the block into something weakly pseudorandom + g_square(&(ctx->b)); +} + static void gidx_init(gidx_ctx *ctx, uint32_t pass_number, @@ -203,40 +237,50 @@ gidx_init(gidx_ctx *ctx, ctx->slice_number = slice_number; ctx->nb_blocks = nb_blocks; ctx->nb_iterations = nb_iterations; - ctx->ctr = 0; // first block starts by 1. - ctx->next_index = 128; // will force increment of ctr upon gidx_next(). + ctx->ctr = 1; // not zero, surprisingly + ctx->index = pass_number == 0 && slice_number == 0 ? 2 : 0; + gidx_refresh(ctx); } -static uint64_t -gidx_next(gidx_ctx *ctx, uint32_t current_block) +static uint32_t +gidx_next(gidx_ctx *ctx) { - // lazily create the index block we need - if (ctx->next_index == 128) { - ctx->next_index = 0; + // lazily creates the index block we need + if (ctx->index == 128) { + ctx->index = 0; ctx->ctr++; - // refreshe the underlying block - zero_block(&(ctx->b)); - ctx->b.a[0] = ctx->pass_number; - ctx->b.a[1] = 0; // lane number (we have only one) - ctx->b.a[2] = ctx->slice_number; - ctx->b.a[3] = ctx->nb_blocks; - ctx->b.a[4] = ctx->nb_iterations; - ctx->b.a[5] = 1; // type: Argon2i - ctx->b.a[6] = ctx->ctr; - g_rounds(&(ctx->b)); + gidx_refresh(ctx); } // we don't need J2, because there's only one lane. - uint64_t j1 = ctx->b.a[ctx->next_index]; // 32 least significant bits - ctx->next_index++; - - _Bool first_pass = ctx->pass_number == 1; // first pass == 1, not zero - uint32_t lane_size = ctx->nb_blocks; - uint32_t area_size = first_pass ? current_block - 1 : lane_size - 2; - uint64_t x = (j1 * j1) >> 32; - uint64_t y = (area_size * x) >> 32; - uint64_t z = area_size - 1 - y; - uint32_t start_pos = first_pass ? 0 : current_block + 1; - return (start_pos + z) % lane_size; + uint64_t j1 = ctx->b.a[ctx->index] & 0xffffffff; + + // Computes the area size. + // Pass 0 : all already finished segments plus already constructed + // blocks in this segment + // Pass 1+: 3 last segments plus already constructed + // blocks in this segment + // THIS IS NOT WHAT THE SPEC SAYS. HERE I COPY THE REFERENCE IMPLEMENTATION + //uint32_t area_size = first_pass ? current_block - 1 : lane_size - 2; + _Bool first_pass = ctx->pass_number == 0; + uint32_t slice_size = ctx->nb_blocks / 4; + uint32_t area_size = ((first_pass ? ctx->slice_number : 3) + * slice_size + ctx->index - 1); + + uint32_t next_slice = (ctx->slice_number == 3 + ? 0 + : (ctx->slice_number + 1) * slice_size); + + // Generates the actual index from J1 + uint64_t x = (j1 * j1) >> 32; + uint64_t y = (area_size * x) >> 32; + uint64_t z = area_size - 1 - y; + uint32_t start_pos = first_pass ? 0 : next_slice; + printf("s%d_%d", start_pos, area_size); + uint32_t actual_pos = (start_pos + z) % ctx->nb_blocks; + + ctx->index++; // updates index for the next call + + return actual_pos; } void @@ -253,27 +297,71 @@ crypto_Argon2i_hash(uint8_t *tag, uint32_t tag_size, block *blocks = work_area; { - uint8_t buf[4]; crypto_blake2b_ctx ctx; crypto_blake2b_init(&ctx); - store32_le(buf, 1 /* p */ ); crypto_blake2b_update(&ctx, buf, 4); - store32_le(buf, tag_size ); crypto_blake2b_update(&ctx, buf, 4); - store32_le(buf, nb_blocks ); crypto_blake2b_update(&ctx, buf, 4); - store32_le(buf, nb_iterations); crypto_blake2b_update(&ctx, buf, 4); - store32_le(buf, 0x13 /* v */ ); crypto_blake2b_update(&ctx, buf, 4); - store32_le(buf, 1 /* y */ ); crypto_blake2b_update(&ctx, buf, 4); - store32_le(buf, password_size); crypto_blake2b_update(&ctx, buf, 4); + + blake_update_32 (&ctx, 1 ); // p: number of threads + blake_update_32 (&ctx, tag_size ); + blake_update_32 (&ctx, nb_blocks ); + blake_update_32 (&ctx, nb_iterations); + blake_update_32 (&ctx, 0x13 ); // v: version number + blake_update_32 (&ctx, 1 ); // y: Argon2i + blake_update_32 (&ctx, password_size); crypto_blake2b_update(&ctx, password, password_size); - store32_le(buf, salt_size ); crypto_blake2b_update(&ctx, buf, 4); - crypto_blake2b_update(&ctx, salt, salt_size); - store32_le(buf, key_size ); crypto_blake2b_update(&ctx, buf, 4); - crypto_blake2b_update(&ctx, key, key_size); - store32_le(buf, ad_size ); crypto_blake2b_update(&ctx, buf, 4); - crypto_blake2b_update(&ctx, ad, ad_size); - - uint8_t initial_hash[72]; // 64 bytes plus additional words for future hashes + blake_update_32 (&ctx, salt_size); + crypto_blake2b_update(&ctx, salt, salt_size); + blake_update_32 (&ctx, key_size); + crypto_blake2b_update(&ctx, key, key_size); + blake_update_32 (&ctx, ad_size); + crypto_blake2b_update(&ctx, ad, ad_size); + + uint8_t initial_hash[72]; // 64 bytes plus 2 words for future hashes crypto_blake2b_final(&ctx, initial_hash); + /* // debug stuff */ + /* int input_size = 40 + password_size + salt_size + key_size + ad_size; */ + /* int i = 0; */ + /* uint8_t *input = malloc(input_size); */ + /* store32_le(input + i, 4 ); i += 4; */ + /* store32_le(input + i, tag_size ); i += 4; */ + /* store32_le(input + i, nb_blocks ); i += 4; */ + /* store32_le(input + i, nb_iterations); i += 4; */ + /* store32_le(input + i, 0x13 ); i += 4; */ + /* store32_le(input + i, 1 ); i += 4; */ + /* store32_le(input + i, password_size); i += 4; */ + /* memcpy (input + i, password, password_size); i += password_size; */ + /* store32_le(input + i, salt_size ); i += 4; */ + /* memcpy (input + i, salt, salt_size ); i += salt_size; */ + /* store32_le(input + i, key_size ); i += 4; */ + /* memcpy (input + i, key, key_size ); i += key_size; */ + /* store32_le(input + i, ad_size ); i += 4; */ + /* memcpy (input + i, ad, ad_size ); i += ad_size; */ + /* printf("input_size, i: %d, %d", input_size, i); */ + /* for (int i = 0; i < input_size; i++) { */ + /* if (i % 4 == 0) { */ + /* printf("\n"); */ + /* } */ + /* printf("%02x", input[i]); */ + /* } */ + /* printf("\n"); */ + /* printf("Memory : %d\n", nb_blocks ); */ + /* printf("Iterations : %d\n", nb_iterations); */ + /* printf("Parallelism: %d\n", 4 ); */ + /* printf("Tag length : %d\n", tag_size ); */ + /* printf("Pwd length : %d\n", password_size); */ + /* printf("Slt length : %d\n", salt_size ); */ + /* printf("Key length : %d\n", key_size ); */ + /* printf("AD length : %d\n", ad_size ); */ + + printf("Pre-hashing digest:\n"); + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) { + printf("%02x ", initial_hash[8*i + j]); + } + printf("\n"); + } + // end debug stuff + // fill first 2 blocks block tmp_block; uint8_t hash_area[1024]; @@ -283,55 +371,54 @@ crypto_Argon2i_hash(uint8_t *tag, uint32_t tag_size, load_block(&tmp_block, hash_area); copy_block(blocks, &tmp_block); - store32_le(initial_hash + 68, 1); // slight modification + store32_le(initial_hash + 64, 1); // slight modification extended_hash(hash_area, 1024, initial_hash, 72); load_block(&tmp_block, hash_area); - xor_block(blocks + 1, &tmp_block); + copy_block(blocks + 1, &tmp_block); } // Actual number of blocks nb_blocks -= nb_blocks % 4; // round down to 4 p (p == 1 thread) const uint32_t segment_size = nb_blocks / 4; - // fill the rest of the first segment - { - gidx_ctx ctx; - gidx_init(&ctx, 1, 1, nb_blocks, nb_iterations); - for (uint32_t i = 2; i < segment_size; i++) { - binary_g(blocks + i, // current block - blocks + i - 1, // previous block - blocks + gidx_next(&ctx, i), // reference block - copy_block); // first pass is a raw copy - } - } - // fill the other 3 segments - for (int segment = 1; segment < 4; segment++ ) { - gidx_ctx ctx; - gidx_init(&ctx, 1, segment + 1, nb_blocks, nb_iterations); - for (uint32_t i = segment * segment_size; - i < (segment + 1) * segment_size; - i++) { - binary_g(blocks + i, // current block - blocks + i - 1, // previous block - blocks + gidx_next(&ctx, i), // reference block - copy_block); // first pass is a raw copy - } - } + // fill (then re-fill) the rest of the blocks + for (uint32_t pass_number = 0; pass_number < nb_iterations; pass_number++) { + _Bool first_pass = pass_number == 0; + // Simple copy on pass 0, XOR instead of overwrite on subsequent passes + void (*xcopy) (block*, const block*) = first_pass ?copy_block :xor_block; + + for (int segment = 0; segment < 4; segment++ ) { + + gidx_ctx ctx; + gidx_init(&ctx, pass_number, segment, nb_blocks, nb_iterations); - // subsequent iterations (xor computations with previous results) - for (int segment = 0; segment < 4; segment++ ) { - gidx_ctx ctx; - gidx_init(&ctx, 1, segment + 1, nb_blocks, nb_iterations); - for (uint32_t i = segment * segment_size; - i < (segment + 1) * segment_size; - i++) { - binary_g(blocks + i, // current block - blocks + (i - 1) % nb_blocks, // previous block (modulo!) - blocks + gidx_next(&ctx, i), // reference block - xor_block); // subsequent passes are XOR + // On the first segment of the first pass, + // blocks 0 and 1 are already filled. + // We use the offset to skip them. + uint32_t offset = first_pass && segment == 0 ? 2 : 0; + // current, reference, and previous are block indices + for (uint32_t current = segment * segment_size + offset; + current < (segment + 1) * segment_size; + current++) { + uint32_t previous = current == 0 ? nb_blocks - 1 : current - 1; + uint32_t reference = gidx_next(&ctx); + // debug stuff + printf("(%2d,%2d,%2d) ", current, previous, reference); + // end debug stuff + binary_g(blocks + current, + blocks + previous, + blocks + reference, + xcopy); + } + printf("\n"); } + // debug stuff + for (uint32_t i = 0; i < nb_blocks; i++) { + printf("blocks[%2d]: %016lx\n", i, blocks[i].a[0]); + } + printf("\n"); + // end debug stuf } - // hash the very last block with H' into the output tag uint8_t final_block[1024]; store_block(final_block, blocks + (nb_blocks - 1)); diff --git a/build.sh b/build.sh index 410c104..ed13e02 100755 --- a/build.sh +++ b/build.sh @@ -9,5 +9,5 @@ $CC $CFLAGS -c poly1305.c $CC $CFLAGS -c argon2i.c $CC $CFLAGS -c test.c -$CC $CFLAGS -o test test.o chacha20.o blake2b.o poly1305.o +$CC $CFLAGS -o test test.o chacha20.o argon2i.o blake2b.o poly1305.o $CC $CFLAGS -o speed_blake2b speed_blake2b.c blake2b.o diff --git a/test.c b/test.c index 4429a6e..80fbec5 100644 --- a/test.c +++ b/test.c @@ -6,6 +6,7 @@ #include "chacha20.h" #include "blake2b.h" #include "poly1305.h" +#include "argon2i.h" ///////////////////////// /// General utilities /// @@ -38,7 +39,9 @@ uint_of_char(unsigned char c) if (c >= '0' && c <= '9') { return c - '0'; } if (c >= 'a' && c <= 'f') { return c - 'a' + 10; } if (c >= 'A' && c <= 'F') { return c - 'A' + 10; } - fprintf(stderr, "'%c' (%d): Not a hexadecimal char\n", c, c); + fprintf(stderr, + "'%c' (%d): Not a hexadecimal char" + " (note: they go in pairs)\n", c, c); exit(1); } @@ -161,6 +164,7 @@ test_blake2b(char* filename) crypto_general_blake2b(out.buffer, hash.size, key.buffer, key .size, in .buffer, in .size); + status |= memcmp(out.buffer, hash.buffer, out.size); vec_del(&out); @@ -197,11 +201,54 @@ test_poly1305(char *filename) return status; } +static int +test_argon2i(char *filename) +{ + int status = 0; + FILE *file = file_open(filename); + while (getc(file) != EOF) { + vector nb_blocks = read_hex_line(file); + vector nb_iterations = read_hex_line(file); + vector password = read_hex_line(file); + vector salt = read_hex_line(file); + vector key = read_hex_line(file); + vector ad = read_hex_line(file); + vector tag = read_hex_line(file); + vector out = vec_uninitialized(tag.size); + + void *work_area = malloc(nb_blocks.buffer[0] * 1024); + + crypto_Argon2i_hash(out .buffer, out .size, + password.buffer, password.size, + salt .buffer, salt .size, + key .buffer, key .size, + ad .buffer, ad .size, + work_area, + nb_blocks .buffer[0], + nb_iterations.buffer[0]); + + status |= memcmp(out.buffer, tag.buffer, out.size); + + vec_del(&nb_blocks ); + vec_del(&nb_iterations); + vec_del(&password ); + vec_del(&salt ); + vec_del(&key ); + vec_del(&ad ); + vec_del(&tag ); + vec_del(&out ); + } + printf("%s: argon2i\n", status != 0 ? "FAILED" : "OK"); + fclose(file); + return 0; // return status; +} + int main(void) { int status = 0; status |= test_chacha20("vectors_chacha20.txt"); status |= test_blake2b ("vectors_blake2b.txt" ); status |= test_poly1305("vectors_poly1305.txt"); + status |= test_argon2i ("vectors_argon2i.txt" ); return status; } diff --git a/vectors_argon2i.txt b/vectors_argon2i.txt new file mode 100644 index 0000000..b87d21b --- /dev/null +++ b/vectors_argon2i.txt @@ -0,0 +1,7 @@ +mem: 0c +iter: 03 +pwd: 0101010101010101010101010101010101010101010101010101010101010101 +salt: 02020202020202020202020202020202 +key: 0303030303030303 +ad: 040404040404040404040404 +tag: 25b608be8ddbe53bb027bfd0f2a5c827e819b6fee0c28eca883f27d724ee7a3a