]> git.codecow.com Git - Monocypher.git/commitdiff
Argon2i: support multiple lanes
authorLoup Vaillant <loup@loup-vaillant.fr>
Wed, 28 Dec 2022 08:08:37 +0000 (09:08 +0100)
committerLoup Vaillant <loup@loup-vaillant.fr>
Wed, 28 Dec 2022 08:18:02 +0000 (09:18 +0100)
src/monocypher.c
src/monocypher.h
tests/gen/argon2i.c
tests/gen/vectors/argon2i
tests/test.c
tests/tis-ci-vectors.h
tests/tis-ci.c

index 7714709381d0b19c7722cecb8fae84d3d541049a..42b47dfe80508bb44a3e261b80b3d587a801c888 100644 (file)
@@ -660,16 +660,6 @@ static void blake_update_32(crypto_blake2b_ctx *ctx, u32 input)
        WIPE_BUFFER(buf);
 }
 
-static void load_block(block *b, const u8 bytes[1024])
-{
-       load64_le_buf(b->a, bytes, 128);
-}
-
-static void store_block(u8 bytes[1024], const block *b)
-{
-       store64_le_buf(bytes, b->a, 128);
-}
-
 static void copy_block(block *o,const block*in){FOR(i,0,128)o->a[i] = in->a[i];}
 static void  xor_block(block *o,const block*in){FOR(i,0,128)o->a[i]^= in->a[i];}
 
@@ -747,18 +737,59 @@ static void g_rounds(block *work_block)
        }
 }
 
+typedef struct {
+       u32 nb_blocks;
+       u32 nb_lanes;
+       u32 nb_passes;
+} argon_hardness;
+
+typedef struct {
+       u32 pass;
+       u32 slice;
+       u32 lane;
+       u32 block;
+} argon_index;
+
+static u32 ref_index(u64 seed, argon_hardness h, argon_index idx)
+{
+       u64 j1 = seed & 0xffffffff; // block selector (inside a lane)
+       u64 j2 = seed >> 32;        // lane selector
+
+       // Blocks may be picked from any of:
+       // - The last 3 slices (if they exist yet)
+       // - The already constructed blocks in this segment (except the last)
+       int first_pass   = idx.pass == 0;
+       u32 lane_size    = h.nb_blocks / h.nb_lanes;
+       u32 segment_size = lane_size / 4;
+       u32 lane         = j2 % h.nb_lanes;
+
+       // Start of the reference set
+       u32 next_slice   = ((idx.slice + 1) % 4) * segment_size;
+       u32 start        = first_pass ? 0 : next_slice;
+
+       // Size of the reference set
+       u32 nb_segments  = first_pass       ? idx.slice     : 3;
+       u32 nb_blocks    = lane == idx.lane ? idx.block - 1 : 0;
+       nb_blocks       -= lane != idx.lane && idx.block == 0; // why the fuck?
+       u32 w_size       = nb_segments * segment_size + nb_blocks;
+
+       // Generate offset from J1 and J2
+       u64 x   = (j1 * j1)    >> 32;
+       u64 y   = (w_size * x) >> 32;
+       u64 z   = (w_size - 1) - y;
+       u64 ref = (start + z) % lane_size;
+       return lane * lane_size + (u32)ref;
+}
+
 // Argon2i uses a kind of stream cipher to determine which reference
 // block it will take to synthesise the next block.  This context hold
 // that stream's state.  (It's very similar to Chacha20.  The block b
 // is analogous to Chacha's own pool)
 typedef struct {
        block b;
-       u32 pass_number;
-       u32 slice_number;
-       u32 nb_blocks;
-       u32 nb_iterations;
+       argon_hardness h;
+       argon_index idx;
        u32 ctr;
-       u32 offset;
 } gidx_ctx;
 
 // The block in the context will determine array indices. To avoid
@@ -767,12 +798,14 @@ typedef struct {
 // easier, but timing attacks are the bigger threat in many settings.
 static void gidx_refresh(gidx_ctx *ctx)
 {
+       ctx->ctr++;
+
        // seed the beginning of the block...
-       ctx->b.a[0] = ctx->pass_number;
-       ctx->b.a[1] = 0;  // lane number (we have only one)
-       ctx->b.a[2] = ctx->slice_number;
-       ctx->b.a[3] = ctx->nb_blocks;
-       ctx->b.a[4] = ctx->nb_iterations;
+       ctx->b.a[0] = ctx->idx.pass;
+       ctx->b.a[1] = ctx->idx.lane;
+       ctx->b.a[2] = ctx->idx.slice;
+       ctx->b.a[3] = ctx->h.nb_blocks;
+       ctx->b.a[4] = ctx->h.nb_passes;
        ctx->b.a[5] = 1;  // type: Argon2i
        ctx->b.a[6] = ctx->ctr;
        ZERO(ctx->b.a + 7, 121); // ...then zero the rest out
@@ -789,87 +822,60 @@ static void gidx_refresh(gidx_ctx *ctx)
        wipe_block(&tmp);
 }
 
-static void gidx_init(gidx_ctx *ctx,
-                      u32 pass_number, u32 slice_number,
-                      u32 nb_blocks,   u32 nb_iterations)
-{
-       ctx->pass_number   = pass_number;
-       ctx->slice_number  = slice_number;
-       ctx->nb_blocks     = nb_blocks;
-       ctx->nb_iterations = nb_iterations;
-       ctx->ctr           = 0;
-
-       // Offset from the beginning of the segment.  For the first slice
-       // of the first pass, we start at the *third* block, so the offset
-       // starts at 2, not 0.
-       if (pass_number != 0 || slice_number != 0) {
-               ctx->offset = 0;
-       } else {
-               ctx->offset = 2;
-               ctx->ctr++;         // Compensates for missed lazy creation
-               gidx_refresh(ctx);  // at the start of gidx_next()
+static void gidx_init(gidx_ctx *ctx, argon_hardness h, argon_index idx)
+{
+       ctx->h   = h;
+       ctx->idx = idx;
+       ctx->ctr = 0;
+
+       // On the first slice of the first pass, 2 blocks are already
+       // filled, and idx.block == 2 instead of zero. In this case the lazy
+       // refresh does not happen, so we need to refresh manually.
+       //
+       // We could instead unconditionally refresh, and use an eager
+       // refresh instead, but this wastes up to one refresh per segment.
+       if (idx.block != 0) {
+               gidx_refresh(ctx);
        }
 }
 
 static u32 gidx_next(gidx_ctx *ctx)
 {
        // lazily creates the offset block we need
-       if ((ctx->offset & 127) == 0) {
-               ctx->ctr++;
+       if ((ctx->idx.block & 127) == 0) {
                gidx_refresh(ctx);
        }
-       u32 index  = ctx->offset & 127; // save index  for current call
-       u32 offset = ctx->offset;       // save offset for current call
-       ctx->offset++;                  // update offset for next call
-
-       // Computes the area size.
-       // Pass 0 : all already finished segments plus already constructed
-       //          blocks in this segment
-       // Pass 1+: 3 last segments plus already constructed
-       //          blocks in this segment.  THE SPEC SUGGESTS OTHERWISE.
-       //          I CONFORM TO THE REFERENCE IMPLEMENTATION.
-       int first_pass  = ctx->pass_number == 0;
-       u32 slice_size  = ctx->nb_blocks >> 2;
-       u32 nb_segments = first_pass ? ctx->slice_number : 3;
-       u32 area_size   = nb_segments * slice_size + offset - 1;
-
-       // Computes the starting position of the reference area.
-       // CONTRARY TO WHAT THE SPEC SUGGESTS, IT STARTS AT THE
-       // NEXT SEGMENT, NOT THE NEXT BLOCK.
-       u32 next_slice = ((ctx->slice_number + 1) & 3) * slice_size;
-       u32 start_pos  = first_pass ? 0 : next_slice;
-
-       // Generate offset from J1 (no need for J2, there's only one lane)
-       u64 j1  = ctx->b.a[index] & 0xffffffff; // pseudo-random number
-       u64 x   = (j1 * j1)       >> 32;
-       u64 y   = (area_size * x) >> 32;
-       u64 z   = (area_size - 1) - y;
-       u64 ref = start_pos + z;                // ref < 2 * nb_blocks
-       return (u32)(ref < ctx->nb_blocks ? ref : ref - ctx->nb_blocks);
+       u32 index = ref_index(ctx->b.a[ctx->idx.block], ctx->h, ctx->idx);
+       ctx->idx.block++;
+       return index;
+
 }
 
 const crypto_argon2_settings crypto_argon2i_defaults = {
                CRYPTO_ARGON2_I, // algorithm
-               100000, 3, 1,    // nb_blocks, nb_iterations, nb_lanes
+               100000, 3, 1,    // nb_blocks, nb_passes, nb_lanes
                16, 32,          // salt_size, hash_size
                0, 0, 0, 0,      // no key, no ad
 };
 
-// Main algorithm
 void crypto_argon2(u8 *hash, void *work_area, const u8 *password,
                    u32 password_size, const u8 *salt, crypto_argon2_settings s)
 {
+       const u32 segment_size = s.nb_blocks / s.nb_lanes / 4;
+       const u32 lane_size    = segment_size * 4;
+       const u32 nb_blocks    = lane_size * s.nb_lanes; // s.nb_blocks rounded down
+
        // work area seen as blocks (must be suitably aligned)
        block *blocks = (block*)work_area;
        {
                crypto_blake2b_ctx ctx;
                crypto_blake2b_init(&ctx);
-               blake_update_32      (&ctx, s.nb_lanes     ); // p: number of "threads"
-               blake_update_32      (&ctx, s.hash_size    );
-               blake_update_32      (&ctx, s.nb_blocks    );
-               blake_update_32      (&ctx, s.nb_iterations);
-               blake_update_32      (&ctx, 0x13           ); // v: version number
-               blake_update_32      (&ctx, s.algorithm    ); // y: Argon2i, Argon2d...
+               blake_update_32      (&ctx, s.nb_lanes ); // p: number of "threads"
+               blake_update_32      (&ctx, s.hash_size);
+               blake_update_32      (&ctx, s.nb_blocks);
+               blake_update_32      (&ctx, s.nb_passes);
+               blake_update_32      (&ctx, 0x13       ); // v: version number
+               blake_update_32      (&ctx, s.algorithm); // y: Argon2i, Argon2d...
                blake_update_32      (&ctx,           password_size);
                crypto_blake2b_update(&ctx, password, password_size);
                blake_update_32      (&ctx,           s.salt_size);
@@ -882,67 +888,87 @@ void crypto_argon2(u8 *hash, void *work_area, const u8 *password,
                u8 initial_hash[72]; // 64 bytes plus 2 words for future hashes
                crypto_blake2b_final(&ctx, initial_hash);
 
-               // fill first 2 blocks
+               // fill first 2 blocks of each lane
                u8 hash_area[1024];
-               store32_le(initial_hash + 64, 0); // first  additional word
-               store32_le(initial_hash + 68, 0); // second additional word
-               extended_hash(hash_area, 1024, initial_hash, 72);
-               load_block(blocks, hash_area);
-
-               store32_le(initial_hash + 64, 1); // slight modification
-               extended_hash(hash_area, 1024, initial_hash, 72);
-               load_block(blocks + 1, hash_area);
+               FOR_T(u32, l, 0, s.nb_lanes) {
+                       FOR_T(u32, i, 0, 2) {
+                               store32_le(initial_hash + 64, i); // first  additional word
+                               store32_le(initial_hash + 68, l); // second additional word
+                               extended_hash(hash_area, 1024, initial_hash, 72);
+                               load64_le_buf(blocks[l * lane_size + i].a, hash_area, 128);
+                       }
+               }
 
                WIPE_BUFFER(initial_hash);
                WIPE_BUFFER(hash_area);
        }
 
-       // Actual number of blocks (must be a multiple of 4 p)
-       u32 nb_blocks = s.nb_blocks - s.nb_blocks % (4 * s.nb_lanes);
-       const u32 segment_size = nb_blocks >> 2;
-
        // fill (then re-fill) the rest of the blocks
        block tmp;
-       gidx_ctx ctx; // public information, no need to wipe
-       FOR_T (u32, pass_number, 0, s.nb_iterations) {
-               int first_pass = pass_number == 0;
-
-               FOR_T (u32, segment, 0, 4) {
-                       gidx_init(&ctx, pass_number, segment, nb_blocks, s.nb_iterations);
-
-                       // On the first segment of the first pass,
-                       // blocks 0 and 1 are already filled.
-                       // We use the offset to skip them.
-                       u32 start_offset  = first_pass && segment == 0 ? 2 : 0;
-                       u32 segment_start = segment * segment_size + start_offset;
-                       u32 segment_end   = (segment + 1) * segment_size;
-                       FOR_T (u32, current_block, segment_start, segment_end) {
-                               block *reference = blocks + gidx_next(&ctx);
-                               block *current   = blocks + current_block;
-                               block *previous  =
-                                       current_block == 0
-                                       ? blocks + nb_blocks - 1
-                                       : blocks + current_block - 1;
-                               // Apply compression function G,
-                               // And copy it (or XOR it) to the current block.
-                               copy_block(&tmp, previous);
-                               xor_block (&tmp, reference);
-                               if (first_pass) { copy_block(current, &tmp); }
-                               else            { xor_block (current, &tmp); }
-                               g_rounds  (&tmp);
-                               xor_block (current, &tmp);
+       FOR_T(u32, pass, 0, s.nb_passes) {
+               FOR_T(u32, slice, 0, 4) {
+                       // Each segment within the same slice are independent of
+                       // each other, and can be computed in parallel (one thread
+                       // per lane).  We only need to wait for all segments to be
+                       // finished before starting the next slice
+                       //
+                       // Monocpher has no support for threads, so segments are
+                       // computed sequentially here.  Note: optimal performance
+                       // (and therefore security) requires one thread per lane.
+                       // Without threads, multi-lane support is only there for
+                       // compatibility, or as a reference.
+                       FOR_T(u32, segment, 0, s.nb_lanes) {
+                               // On the first slice of the first pass,
+                               // blocks 0 and 1 are already filled.
+                               // We use the offset to skip them.
+                               u32    pass_offset   = pass == 0 && slice == 0 ? 2 : 0;
+                               u32    lane_offset   = segment * lane_size;
+                               u32    slice_offset  = slice * segment_size;
+                               block *segment_start = blocks + lane_offset + slice_offset;
+
+                               gidx_ctx ctx; // public information, not wiped
+                               gidx_init(&ctx,
+                                         (argon_hardness){ nb_blocks, s.nb_lanes, s.nb_passes},
+                                         (argon_index)   { pass, slice, segment, pass_offset});
+                               FOR_T (u32, current_block, pass_offset, segment_size) {
+                                       block *reference = blocks + gidx_next(&ctx);
+                                       block *current   = segment_start + current_block;
+                                       block *previous  =
+                                               current_block == 0 && slice_offset == 0
+                                               ? segment_start + lane_size - 1
+                                               : segment_start + current_block - 1;
+
+                                       // Apply compression function G,
+                                       // And copy it (or XOR it) to the current block.
+                                       copy_block(&tmp, previous);
+                                       xor_block (&tmp, reference);
+                                       if (pass == 0) { copy_block(current, &tmp); }
+                                       else           { xor_block (current, &tmp); }
+                                       g_rounds  (&tmp);
+                                       xor_block (current, &tmp);
+                               }
                        }
                }
        }
        wipe_block(&tmp);
+
+       // XOR last blocks of each lane
+       block *last_block = blocks + lane_size - 1;
+       FOR_T (u32, lane, 1, s.nb_lanes) {
+               block *next_block = last_block + lane_size;
+               xor_block(next_block, last_block);
+               last_block = next_block;
+       }
+
+       // Serialize last block
        u8 final_block[1024];
-       store_block(final_block, blocks + (nb_blocks - 1));
+       store64_le_buf(final_block, last_block->a, 128);
 
-       // wipe work area
+       // Wipe work area
        volatile u64 *p = (u64*)work_area;
        ZERO(p, 128 * nb_blocks);
 
-       // hash the very last block with H' into the output hash
+       // Hash the very last block with H' into the output hash
        extended_hash(hash, s.hash_size, final_block, 1024);
        WIPE_BUFFER(final_block);
 }
index 6dacc87e7e088448327a01944f0b64abb7f48a59..08a3029c3f1c5fc013cd597957b01678f63387e9 100644 (file)
@@ -144,13 +144,13 @@ void crypto_blake2b_general_init(crypto_blake2b_ctx *ctx, size_t hash_size,
 // ----------------------------------
 
 typedef struct {
-       uint32_t algorithm;     // Argon2i, Argon2d, Argon2id
-       uint32_t nb_blocks;     // memory hardness, >= 8
-       uint32_t nb_iterations; // CPU hardness, >= 1 (>= 3 recommended for Argon2i)
-       uint32_t nb_lanes;      // parallelism level (single threaded anyway)
-       uint32_t salt_size;     // we recommend 16 bytes
-       uint32_t hash_size;     // we recommend 32 bytes per key
-       const uint8_t *key;     // pointers are aligned to 8 bytes
+       uint32_t algorithm;  // Argon2i, Argon2d, Argon2id
+       uint32_t nb_blocks;  // memory hardness, >= 8
+       uint32_t nb_passes;  // CPU hardness, >= 1 (>= 3 recommended for Argon2i)
+       uint32_t nb_lanes;   // parallelism level (single threaded anyway)
+       uint32_t salt_size;  // we recommend 16 bytes
+       uint32_t hash_size;  // we recommend 32 bytes per key
+       const uint8_t *key;  // pointers are aligned to 8 bytes
        const uint8_t *ad;
        uint32_t key_size;
        uint32_t ad_size;
index d22e7a51687c8c893b29d147eca38241a20bb425..3bbee5f3fc64655fd57ab4173187326e5ffff1d5 100644 (file)
@@ -73,7 +73,9 @@ static void test(size_t nb_blocks, size_t hash_size, size_t nb_iterations)
        print_number(nb_iterations                    );
        print_vector(password, 16                     );
        print_vector(salt    , crypto_pwhash_SALTBYTES);
-       printf(":\n:\n"); // no key, no additionnal data
+       print_number(1);  // one lane (no parallelism)
+       printf(":\n");    // no key
+       printf(":\n");    // no additionnal data
        print_vector(hash    , hash_size              );
        printf("\n");
 }
index 7b4c51e2b0f22c010e9b45afa24e25aad386c5e1..c96ea2bc02dac1cc10965dd7e9ded59e6551f487 100644 (file)
@@ -2,6 +2,16 @@
 0300000000000000:
 0101010101010101010101010101010101010101010101010101010101010101:
 02020202020202020202020202020202:
+0100000000000000:
 0303030303030303:
 040404040404040404040404:
 afe519be3ab0e92375df221dfb17347080c7000b1be85f9ee39978bf11e7cc3a:
+
+2000000000000000:
+0300000000000000:
+0101010101010101010101010101010101010101010101010101010101010101:
+02020202020202020202020202020202:
+0400000000000000:
+0303030303030303:
+040404040404040404040404:
+c814d9d1dc7f37aa13f0d77f2494bda1c8de6b016dd388d29952a4c4672b6ce8:
index bce5d50d5a746b805f2eb5aa676092334bcdc2cb..f544775be3175bb7524f63bf65543e46e6a70f08 100644 (file)
@@ -522,24 +522,24 @@ static void test_hmac_sha512()
 ///////////////
 static void argon2i(vector_reader *reader)
 {
-       u64    nb_blocks     = load64_le(next_input(reader).buf);
-       u64    nb_iterations = load64_le(next_input(reader).buf);
-       vector password      = next_input(reader);
-       vector salt          = next_input(reader);
-       vector key           = next_input(reader);
-       vector ad            = next_input(reader);
-       vector out           = next_output(reader);
-       void  *work_area     = alloc(nb_blocks * 1024);
-
        crypto_argon2_settings s = crypto_argon2i_defaults;
-       s.nb_blocks     = nb_blocks;
-       s.nb_iterations = nb_iterations;
-       s.hash_size     = out.size;
-       s.salt_size     = salt.size;
-       s.key           = key.buf;
-       s.key_size      = key.size;
-       s.ad            = ad.buf;
-       s.ad_size       = ad.size;
+
+       s.nb_blocks      = load32_le(next_input(reader).buf);
+       s.nb_passes      = load32_le(next_input(reader).buf);
+       vector password  = next_input(reader);
+       vector salt      = next_input(reader);
+       s.nb_lanes       = load32_le(next_input(reader).buf);
+       vector key       = next_input(reader);
+       vector ad        = next_input(reader);
+       vector out       = next_output(reader);
+       void  *work_area = alloc(s.nb_blocks * 1024);
+
+       s.hash_size = out.size;
+       s.salt_size = salt.size;
+       s.key       = key.buf;
+       s.key_size  = key.size;
+       s.ad        = ad.buf;
+       s.ad_size   = ad.size;
 
        crypto_argon2(out.buf, work_area, password.buf, password.size, salt.buf, s);
        free(work_area);
@@ -568,12 +568,12 @@ static void test_argon2i()
 
                // without overlap
                crypto_argon2_settings s = crypto_argon2i_defaults;
-               s.nb_blocks     = 8;
-               s.nb_iterations = 1;
-               s.key           = key;
-               s.ad            = ad;
-               s.key_size      = 32;
-               s.ad_size       = 32;
+               s.nb_blocks = 8;
+               s.nb_passes = 1;
+               s.key       = key;
+               s.ad        = ad;
+               s.key_size  = 32;
+               s.ad_size   = 32;
                crypto_argon2(hash1, clean_work_area, pass, 16, salt, s);
 
                // with overlap
index 3ba12b00991a6a371c8b5de0f9653b1535212f7d..d5ef0694a16d3baa0bf38381459a890406bf6d21 100644 (file)
@@ -349,18 +349,20 @@ static const char *argon2i_vectors[]={
   "0300000000000000",
   "e4e4c4054fe35a75d9c0f679ad8770d8",
   "227e68e4c1e68ce67ee88e6be251a207",
+  "0100000000000000",
   "",
   "",
   "2a2ec585be2ec27c215f677e947c212b1b85de797167d4950e29987977c941117c4c5f6f6f547e62d76b88fa121781986a37ea14dc394917af5396ea58915d",
-  "0800000000000000",
+  "2000000000000000",
   "0300000000000000",
-  "48b3753cff3a6d990163e6b60da1e4e5",
-  "d6a2df78c16c96a52d4fb01ea4ecf70e",
-  "",
-  "",
-  "ec60819d04c1d35416d20abc5908dd972acbfd8f6a282ca2b642064242526683c0f1b237f38bac8279571f049bfed4d8d177ea336f2ec96456eb6c584d3c9607",
+  "0101010101010101010101010101010101010101010101010101010101010101",
+  "02020202020202020202020202020202",
+  "0400000000000000",
+  "0303030303030303",
+  "040404040404040404040404",
+  "c814d9d1dc7f37aa13f0d77f2494bda1c8de6b016dd388d29952a4c4672b6ce8",
 };
-static size_t nb_argon2i_vectors=14;
+static size_t nb_argon2i_vectors=16;
 static const char *edDSA_vectors[]={
   "50831c8cb43cd6822bf3f6fae0801cb6c843d8066b07346635365fb7d6ee54e5",
   "b600ab324d70d2372f3ba5a0d8bdd8b8e797f780b642bd56e69a18db74c389bc",
index 053d6c2d9c03a3e0af2362bc6ae278b8a737c412..1c217470347406fc7b6adfb4158fb80274a6e5c9 100644 (file)
@@ -160,24 +160,24 @@ static void hmac_sha512(vector_reader *reader)
 
 static void argon2i(vector_reader *reader)
 {
-       u64    nb_blocks     = load64_le(next_input(reader).buf);
-       u64    nb_iterations = load64_le(next_input(reader).buf);
-       vector password      = next_input(reader);
-       vector salt          = next_input(reader);
-       vector key           = next_input(reader);
-       vector ad            = next_input(reader);
-       vector out           = next_output(reader);
-       void  *work_area     = alloc(nb_blocks * 1024);
-
        crypto_argon2_settings s = crypto_argon2i_defaults;
-       s.nb_blocks     = nb_blocks;
-       s.nb_iterations = nb_iterations;
-       s.hash_size     = out.size;
-       s.salt_size     = salt.size;
-       s.key           = key.buf;
-       s.key_size      = key.size;
-       s.ad            = ad.buf;
-       s.ad_size       = ad.size;
+
+       s.nb_blocks      = load32_le(next_input(reader).buf);
+       s.nb_passes      = load32_le(next_input(reader).buf);
+       vector password  = next_input(reader);
+       vector salt      = next_input(reader);
+       s.nb_lanes       = load32_le(next_input(reader).buf);
+       vector key       = next_input(reader);
+       vector ad        = next_input(reader);
+       vector out       = next_output(reader);
+       void  *work_area = alloc(s.nb_blocks * 1024);
+
+       s.hash_size = out.size;
+       s.salt_size = salt.size;
+       s.key       = key.buf;
+       s.key_size  = key.size;
+       s.ad        = ad.buf;
+       s.ad_size   = ad.size;
 
        crypto_argon2(out.buf, work_area, password.buf, password.size, salt.buf, s);
        free(work_area);