From: Loup Vaillant <loup@loup-vaillant.fr>
Date: Mon, 12 Dec 2022 21:21:23 +0000 (+0100)
Subject: Reworked Argon2 API (draft)
X-Git-Url: https://git.codecow.com/?a=commitdiff_plain;h=8ce2049c9a496f38a00593025851571e52e5e3e9;p=Monocypher.git

Reworked Argon2 API (draft)

This is a prelude to Argon2d and Argon2id support.  The rationale here
is that supporting both with the current API would require way too many
functions.  Using a structure also helps manage the ungodly amount of
arguments this function has.

A number of unresolved questions so far:

- Should we pass by value or by reference?
- Should we start the struct with a size field, Microsoft style?
- Should we add a version field?
- Should we keep the nb_lanes field?
- If so should we support more than one lane, even while staying single
  threaded?
- Should we provide structures with default values to begin with?

This is mostly an API/ABI compatibility question.  Personally I think we
should omit the size field and pass by value, it feels more convenient
in practice.

A version field would let us support future versions of Argon2 without
breaking users, but the specs are so stable nowadays that I'm not sure
this is worth the trouble.  We may add it if users don't need to know
it's there.

The nb_lanes field however might be required for compatibility with the
_current_ specs, so I'm inclined to keep it even if we delay multi-lane
support indefinitely.

Default values are a difficult problem.  The correct strength for
password hashing is highly context dependent: we almost always want to
chose the highest tolerable strength, and there is no one size fits all.
The current manual outlines a _process_ for finding the values that work
for any given situation.

If we don't provide defaults, users have to fill out the fields
themselves, including fields that won't change often (nb_iterations), or
aren't supported yet (nb_lanes if we keep it). If we do provide
defaults, we need to chose them very carefully, and risk quick
obsolescence.

Finally, it's not clear which field should be in the struct, and which
field should be a regular argument.  Right now I put fields that are
likely to stay identical from invocation to invocation in the struct.

Another possibility is to instead restrict ourselves to fields that have
a good default, which would likely demote the nb_blocks to being a
regular argument.  That way users will know what parameters should be
treated as strong recommendations, and which they're supposed to chose
themselves.

Prepares #243
---

diff --git a/src/monocypher.c b/src/monocypher.c
index 2864f1f..7714709 100644
--- a/src/monocypher.c
+++ b/src/monocypher.c
@@ -848,35 +848,36 @@ static u32 gidx_next(gidx_ctx *ctx)
 	return (u32)(ref < ctx->nb_blocks ? ref : ref - ctx->nb_blocks);
 }
 
+const crypto_argon2_settings crypto_argon2i_defaults = {
+		CRYPTO_ARGON2_I, // algorithm
+		100000, 3, 1,    // nb_blocks, nb_iterations, nb_lanes
+		16, 32,          // salt_size, hash_size
+		0, 0, 0, 0,      // no key, no ad
+};
+
 // Main algorithm
-void crypto_argon2i_general(u8       *hash,      u32 hash_size,
-                            void     *work_area, u32 nb_blocks,
-                            u32 nb_iterations,
-                            const u8 *password,  u32 password_size,
-                            const u8 *salt,      u32 salt_size,
-                            const u8 *key,       u32 key_size,
-                            const u8 *ad,        u32 ad_size)
+void crypto_argon2(u8 *hash, void *work_area, const u8 *password,
+                   u32 password_size, const u8 *salt, crypto_argon2_settings s)
 {
 	// work area seen as blocks (must be suitably aligned)
 	block *blocks = (block*)work_area;
 	{
 		crypto_blake2b_ctx ctx;
 		crypto_blake2b_init(&ctx);
-
-		blake_update_32      (&ctx, 1            ); // p: number of threads
-		blake_update_32      (&ctx, hash_size    );
-		blake_update_32      (&ctx, nb_blocks    );
-		blake_update_32      (&ctx, nb_iterations);
-		blake_update_32      (&ctx, 0x13         ); // v: version number
-		blake_update_32      (&ctx, 1            ); // y: Argon2i
+		blake_update_32      (&ctx, s.nb_lanes     ); // p: number of "threads"
+		blake_update_32      (&ctx, s.hash_size    );
+		blake_update_32      (&ctx, s.nb_blocks    );
+		blake_update_32      (&ctx, s.nb_iterations);
+		blake_update_32      (&ctx, 0x13           ); // v: version number
+		blake_update_32      (&ctx, s.algorithm    ); // y: Argon2i, Argon2d...
 		blake_update_32      (&ctx,           password_size);
 		crypto_blake2b_update(&ctx, password, password_size);
-		blake_update_32      (&ctx,           salt_size);
-		crypto_blake2b_update(&ctx, salt,     salt_size);
-		blake_update_32      (&ctx,           key_size);
-		crypto_blake2b_update(&ctx, key,      key_size);
-		blake_update_32      (&ctx,           ad_size);
-		crypto_blake2b_update(&ctx, ad,       ad_size);
+		blake_update_32      (&ctx,           s.salt_size);
+		crypto_blake2b_update(&ctx, salt,     s.salt_size);
+		blake_update_32      (&ctx,           s.key_size);
+		crypto_blake2b_update(&ctx, s.key,    s.key_size);
+		blake_update_32      (&ctx,           s.ad_size);
+		crypto_blake2b_update(&ctx, s.ad,     s.ad_size);
 
 		u8 initial_hash[72]; // 64 bytes plus 2 words for future hashes
 		crypto_blake2b_final(&ctx, initial_hash);
@@ -896,18 +897,18 @@ void crypto_argon2i_general(u8       *hash,      u32 hash_size,
 		WIPE_BUFFER(hash_area);
 	}
 
-	// Actual number of blocks
-	nb_blocks -= nb_blocks & 3; // round down to 4 p (p == 1 thread)
+	// Actual number of blocks (must be a multiple of 4 p)
+	u32 nb_blocks = s.nb_blocks - s.nb_blocks % (4 * s.nb_lanes);
 	const u32 segment_size = nb_blocks >> 2;
 
 	// fill (then re-fill) the rest of the blocks
 	block tmp;
 	gidx_ctx ctx; // public information, no need to wipe
-	FOR_T (u32, pass_number, 0, nb_iterations) {
+	FOR_T (u32, pass_number, 0, s.nb_iterations) {
 		int first_pass = pass_number == 0;
 
 		FOR_T (u32, segment, 0, 4) {
-			gidx_init(&ctx, pass_number, segment, nb_blocks, nb_iterations);
+			gidx_init(&ctx, pass_number, segment, nb_blocks, s.nb_iterations);
 
 			// On the first segment of the first pass,
 			// blocks 0 and 1 are already filled.
@@ -942,19 +943,10 @@ void crypto_argon2i_general(u8       *hash,      u32 hash_size,
 	ZERO(p, 128 * nb_blocks);
 
 	// hash the very last block with H' into the output hash
-	extended_hash(hash, hash_size, final_block, 1024);
+	extended_hash(hash, s.hash_size, final_block, 1024);
 	WIPE_BUFFER(final_block);
 }
 
-void crypto_argon2i(u8   *hash,      u32 hash_size,
-                    void *work_area, u32 nb_blocks, u32 nb_iterations,
-                    const u8 *password,  u32 password_size,
-                    const u8 *salt,      u32 salt_size)
-{
-	crypto_argon2i_general(hash, hash_size, work_area, nb_blocks, nb_iterations,
-	                       password, password_size, salt , salt_size, 0,0,0,0);
-}
-
 ////////////////////////////////////
 /// Arithmetic modulo 2^255 - 19 ///
 ////////////////////////////////////
diff --git a/src/monocypher.h b/src/monocypher.h
index 941fa04..6dacc87 100644
--- a/src/monocypher.h
+++ b/src/monocypher.h
@@ -142,19 +142,29 @@ void crypto_blake2b_general_init(crypto_blake2b_ctx *ctx, size_t hash_size,
 
 // Password key derivation (Argon2 i)
 // ----------------------------------
-void crypto_argon2i(uint8_t       *hash,      uint32_t hash_size,     // >= 4
-                    void          *work_area, uint32_t nb_blocks,     // >= 8
-                    uint32_t       nb_iterations,                     // >= 3
-                    const uint8_t *password,  uint32_t password_size,
-                    const uint8_t *salt,      uint32_t salt_size);    // >= 8
-
-void crypto_argon2i_general(uint8_t       *hash,      uint32_t hash_size,// >= 4
-                            void          *work_area, uint32_t nb_blocks,// >= 8
-                            uint32_t       nb_iterations,                // >= 3
-                            const uint8_t *password,  uint32_t password_size,
-                            const uint8_t *salt,      uint32_t salt_size,// >= 8
-                            const uint8_t *key,       uint32_t key_size,
-                            const uint8_t *ad,        uint32_t ad_size);
+
+typedef struct {
+	uint32_t algorithm;     // Argon2i, Argon2d, Argon2id
+	uint32_t nb_blocks;     // memory hardness, >= 8
+	uint32_t nb_iterations; // CPU hardness, >= 1 (>= 3 recommended for Argon2i)
+	uint32_t nb_lanes;      // parallelism level (single threaded anyway)
+	uint32_t salt_size;     // we recommend 16 bytes
+	uint32_t hash_size;     // we recommend 32 bytes per key
+	const uint8_t *key;     // pointers are aligned to 8 bytes
+	const uint8_t *ad;
+	uint32_t key_size;
+	uint32_t ad_size;
+} crypto_argon2_settings;
+
+#define CRYPTO_ARGON2_I  1
+
+extern const crypto_argon2_settings crypto_argon2i_defaults;
+
+void crypto_argon2(uint8_t       *hash,
+                   void          *work_area,
+                   const uint8_t *password,  uint32_t password_size,
+                   const uint8_t *salt,
+                   crypto_argon2_settings s);
 
 
 // Key exchange (X-25519)
diff --git a/tests/speed/speed.c b/tests/speed/speed.c
index 0b73725..5c10f79 100644
--- a/tests/speed/speed.c
+++ b/tests/speed/speed.c
@@ -120,13 +120,14 @@ static u64 argon2i(void)
 {
 	u64 work_area[SIZE / 8];
 	u8  hash     [32];
-	u32 nb_blocks = (u32)(SIZE / 1024);
 	RANDOM_INPUT(password,  16);
 	RANDOM_INPUT(salt    ,  16);
 
+	crypto_argon2_settings s = crypto_argon2i_defaults;
+	s.nb_blocks = (u32)(SIZE / 1024);
+
 	TIMING_START {
-		crypto_argon2i(hash, 32, work_area, nb_blocks, 3,
-		               password, 16, salt, 16);
+		crypto_argon2(hash, work_area, password, 16, salt, s);
 	}
 	TIMING_END;
 }
diff --git a/tests/test.c b/tests/test.c
index b47038b..53c578c 100644
--- a/tests/test.c
+++ b/tests/test.c
@@ -539,12 +539,18 @@ static void argon2i(vector_reader *reader)
 	vector ad            = next_input(reader);
 	vector out           = next_output(reader);
 	void  *work_area     = alloc(nb_blocks * 1024);
-	crypto_argon2i_general(out.buf, (u32)out.size,
-	                       work_area, (u32)nb_blocks, (u32)nb_iterations,
-	                       password.buf, (u32)password.size,
-	                       salt    .buf, (u32)salt    .size,
-	                       key     .buf, (u32)key     .size,
-	                       ad      .buf, (u32)ad      .size);
+
+	crypto_argon2_settings s = crypto_argon2i_defaults;
+	s.nb_blocks     = nb_blocks;
+	s.nb_iterations = nb_iterations;
+	s.hash_size     = out.size;
+	s.salt_size     = salt.size;
+	s.key           = key.buf;
+	s.key_size      = key.size;
+	s.ad            = ad.buf;
+	s.ad_size       = ad.size;
+
+	crypto_argon2(out.buf, work_area, password.buf, password.size, salt.buf, s);
 	free(work_area);
 }
 
@@ -552,20 +558,6 @@ static void test_argon2i()
 {
 	VECTORS(argon2i);
 
-	printf("\tArgon2i (easy interface)\n");
-	{
-		void *work_area = alloc(8 * 1024);
-		RANDOM_INPUT(password , 32);
-		RANDOM_INPUT(salt     , 16);
-		u8 hash_general[32];
-		u8 hash_easy   [32];
-		crypto_argon2i_general(hash_general, 32, work_area, 8, 1,
-		                       password, 32, salt, 16, 0, 0, 0, 0);
-		crypto_argon2i(hash_easy, 32, work_area, 8, 1, password, 32, salt, 16);
-		ASSERT_EQUAL(hash_general, hash_easy, 32);
-		free(work_area);
-	}
-
 	printf("\tArgon2i (overlapping i/o)\n");
 	u8 *work_area       = (u8*)alloc(8 * 1024);
 	u8 *clean_work_area = (u8*)alloc(8 * 1024);
@@ -583,13 +575,23 @@ static void test_argon2i()
 		u8  key  [32];  FOR (j, 0, 32) { key [j] = work_area[j +  key_offset]; }
 		u8  ad   [32];  FOR (j, 0, 32) { ad  [j] = work_area[j +   ad_offset]; }
 
-		crypto_argon2i_general(hash1, 32, clean_work_area, 8, 1,
-		                       pass, 16, salt, 16, key, 32, ad, 32);
-		crypto_argon2i_general(hash2, 32, work_area, 8, 1,
-		                       work_area + pass_offset, 16,
-		                       work_area + salt_offset, 16,
-		                       work_area +  key_offset, 32,
-		                       work_area +   ad_offset, 32);
+		// without overlap
+		crypto_argon2_settings s = crypto_argon2i_defaults;
+		s.nb_blocks     = 8;
+		s.nb_iterations = 1;
+		s.key           = key;
+		s.ad            = ad;
+		s.key_size      = 32;
+		s.ad_size       = 32;
+		crypto_argon2(hash1, clean_work_area, pass, 16, salt, s);
+
+		// with overlap
+		s.key = work_area + key_offset;
+		s.ad  = work_area +  ad_offset;
+		crypto_argon2(hash2, work_area,
+		              work_area + pass_offset, 16,
+		              work_area + salt_offset, s);
+
 		ASSERT_EQUAL(hash1, hash2, 32);
 	}
 	free(work_area);
diff --git a/tests/tis-ci.c b/tests/tis-ci.c
index 0fb8e29..ce9dc94 100644
--- a/tests/tis-ci.c
+++ b/tests/tis-ci.c
@@ -168,12 +168,18 @@ static void argon2i(vector_reader *reader)
 	vector ad            = next_input(reader);
 	vector out           = next_output(reader);
 	void  *work_area     = alloc(nb_blocks * 1024);
-	crypto_argon2i_general(out.buf, (u32)out.size,
-	                       work_area, (u32)nb_blocks, (u32)nb_iterations,
-	                       password.buf, (u32)password.size,
-	                       salt    .buf, (u32)salt    .size,
-	                       key     .buf, (u32)key     .size,
-	                       ad      .buf, (u32)ad      .size);
+
+	crypto_argon2_settings s = crypto_argon2i_defaults;
+	s.nb_blocks     = nb_blocks;
+	s.nb_iterations = nb_iterations;
+	s.hash_size     = out.size;
+	s.salt_size     = salt.size;
+	s.key           = key.buf;
+	s.key_size      = key.size;
+	s.ad            = ad.buf;
+	s.ad_size       = ad.size;
+
+	crypto_argon2(out.buf, work_area, password.buf, password.size, salt.buf, s);
 	free(work_area);
 }