I believe it's hard to do any better.
- One function to rule them all.
- Inputs are all nicely organised.
- There's an easy way to omit the key and additional data.
- Argon2 user code is very clear, though a little verbose.
I believe fusing the "regular" and "extra" inputs together would not be
a good idea, because it would make the common case (no extra inputs)
either more verbose or more confusing than it is right now.
Issue #243
WIPE_BUFFER(buf);
}
+static void blake_update_32_buf(crypto_blake2b_ctx *ctx,
+ const u8 *buf, u32 size)
+{
+ blake_update_32(ctx, size);
+ crypto_blake2b_update(ctx, buf, size);
+}
+
+
static void copy_block(blk *o,const blk*in){FOR(i, 0, 128) o->a[i] = in->a[i];}
static void xor_block(blk *o,const blk*in){FOR(i, 0, 128) o->a[i] ^= in->a[i];}
}
}
-void crypto_argon2(u8 *hash, void *work_area,
- const u8 *password, u32 password_size, const u8 *salt,
- const crypto_argon2_ctx *s)
+const crypto_argon2_extras crypto_argon2_no_extras = { 0, 0, 0, 0 };
+
+void crypto_argon2(u8 *hash, u32 hash_size, void *work_area,
+ crypto_argon2_config config,
+ crypto_argon2_inputs inputs,
+ crypto_argon2_extras extras)
{
- const u32 segment_size = s->nb_blocks / s->nb_lanes / 4;
+ const u32 segment_size = config.nb_blocks / config.nb_lanes / 4;
const u32 lane_size = segment_size * 4;
- const u32 nb_blocks = lane_size * s->nb_lanes; // nb_blocks rounded down
+ const u32 nb_blocks = lane_size * config.nb_lanes; // rounding down
// work area seen as blocks (must be suitably aligned)
blk *blocks = (blk*)work_area;
{
- crypto_blake2b_ctx ctx;
- crypto_blake2b_init(&ctx);
- blake_update_32 (&ctx, s->nb_lanes ); // p: number of "threads"
- blake_update_32 (&ctx, s->hash_size);
- blake_update_32 (&ctx, s->nb_blocks);
- blake_update_32 (&ctx, s->nb_passes);
- blake_update_32 (&ctx, 0x13 ); // v: version number
- blake_update_32 (&ctx, s->algorithm); // y: Argon2i, Argon2d...
- blake_update_32 (&ctx, password_size);
- crypto_blake2b_update(&ctx, password, password_size);
- blake_update_32 (&ctx, s->salt_size);
- crypto_blake2b_update(&ctx, salt, s->salt_size);
- blake_update_32 (&ctx, s->key_size);
- crypto_blake2b_update(&ctx, s->key, s->key_size);
- blake_update_32 (&ctx, s->ad_size);
- crypto_blake2b_update(&ctx, s->ad, s->ad_size);
-
u8 initial_hash[72]; // 64 bytes plus 2 words for future hashes
- crypto_blake2b_final(&ctx, initial_hash);
+ crypto_blake2b_ctx ctx;
+ crypto_blake2b_init (&ctx);
+ blake_update_32 (&ctx, config.nb_lanes ); // p: number of "threads"
+ blake_update_32 (&ctx, hash_size);
+ blake_update_32 (&ctx, config.nb_blocks);
+ blake_update_32 (&ctx, config.nb_passes);
+ blake_update_32 (&ctx, 0x13); // v: version number
+ blake_update_32 (&ctx, config.algorithm); // y: Argon2i, Argon2d...
+ blake_update_32_buf (&ctx, inputs.pass, inputs.pass_size);
+ blake_update_32_buf (&ctx, inputs.salt, inputs.salt_size);
+ blake_update_32_buf (&ctx, extras.key, extras.key_size);
+ blake_update_32_buf (&ctx, extras.ad, extras.ad_size);
+ crypto_blake2b_final(&ctx, initial_hash); // fill 64 first bytes only
// fill first 2 blocks of each lane
u8 hash_area[1024];
- FOR_T(u32, l, 0, s->nb_lanes) {
+ FOR_T(u32, l, 0, config.nb_lanes) {
FOR_T(u32, i, 0, 2) {
store32_le(initial_hash + 64, i); // first additional word
store32_le(initial_hash + 68, l); // second additional word
}
// Argon2i and Argon2id start with constant time indexing
- int constant_time = s->algorithm != CRYPTO_ARGON2_D;
+ int constant_time = config.algorithm != CRYPTO_ARGON2_D;
// Fill (and re-fill) the rest of the blocks
//
// thread per lane. The only reason Monocypher supports multiple
// lanes is compatibility.
blk tmp;
- FOR_T(u32, pass, 0, s->nb_passes) {
+ FOR_T(u32, pass, 0, config.nb_passes) {
FOR_T(u32, slice, 0, 4) {
// On the first slice of the first pass,
// blocks 0 and 1 are already filled, hence pass_offset.
// Argon2id switches back to non-constant time indexing
// after the first two slices of the first pass
- if (slice == 2 && s->algorithm == CRYPTO_ARGON2_ID) {
+ if (slice == 2 && config.algorithm == CRYPTO_ARGON2_ID) {
constant_time = 0;
}
// Each iteration of the following loop may be performed in
- // a separate thread. All iterations must be done before we
- // fill the next slice.
- FOR_T(u32, segment, 0, s->nb_lanes) {
+ // a separate thread. All segments must be fully completed
+ // before we start filling the next slice.
+ FOR_T(u32, segment, 0, config.nb_lanes) {
blk index_block;
u32 index_ctr = 1;
FOR_T (u32, block, pass_offset, segment_size) {
index_block.a[1] = segment;
index_block.a[2] = slice;
index_block.a[3] = nb_blocks;
- index_block.a[4] = s->nb_passes;
- index_block.a[5] = s->algorithm;
+ index_block.a[4] = config.nb_passes;
+ index_block.a[5] = config.algorithm;
index_block.a[6] = index_ctr;
index_ctr++;
u64 y = (window_size * x) >> 32;
u64 z = (window_size - 1) - y;
u64 ref = (window_start + z) % lane_size;
- u32 index = (j2 % s->nb_lanes) * lane_size + (u32)ref;
+ u32 index = (j2%config.nb_lanes)*lane_size + (u32)ref;
blk *reference = blocks + index;
// Shuffle the previous & reference block
// XOR last blocks of each lane
blk *last_block = blocks + lane_size - 1;
- FOR_T (u32, lane, 1, s->nb_lanes) {
+ FOR_T (u32, lane, 1, config.nb_lanes) {
blk *next_block = last_block + lane_size;
xor_block(next_block, last_block);
last_block = next_block;
ZERO(p, 128 * nb_blocks);
// Hash the very last block with H' into the output hash
- extended_hash(hash, s->hash_size, final_block, 1024);
+ extended_hash(hash, hash_size, final_block, 1024);
WIPE_BUFFER(final_block);
}
// Password key derivation (Argon2)
// --------------------------------
+#define CRYPTO_ARGON2_D 0
+#define CRYPTO_ARGON2_I 1
+#define CRYPTO_ARGON2_ID 2
typedef struct {
- uint32_t algorithm; // Argon2i, Argon2d, Argon2id
- uint32_t nb_blocks; // memory hardness, >= 8
+ uint32_t algorithm; // Argon2d, Argon2i, Argon2id
+ uint32_t nb_blocks; // memory hardness, >= 8 * nb_lanes
uint32_t nb_passes; // CPU hardness, >= 1 (>= 3 recommended for Argon2i)
uint32_t nb_lanes; // parallelism level (single threaded anyway)
- uint32_t salt_size; // we recommend 16 bytes
- uint32_t hash_size; // we recommend 32 bytes per key
- const uint8_t *key; // pointers are aligned to 8 bytes
- const uint8_t *ad;
- uint32_t key_size;
- uint32_t ad_size;
-} crypto_argon2_ctx;
+} crypto_argon2_config;
-#define CRYPTO_ARGON2_D 0
-#define CRYPTO_ARGON2_I 1
-#define CRYPTO_ARGON2_ID 2
-
-void crypto_argon2(uint8_t *hash,
- void *work_area,
- const uint8_t *password, uint32_t password_size,
- const uint8_t *salt,
- const crypto_argon2_ctx *s);
+typedef struct {
+ const uint8_t *pass;
+ const uint8_t *salt;
+ uint32_t pass_size;
+ uint32_t salt_size; // 16 bytes recommended
+} crypto_argon2_inputs;
+typedef struct {
+ const uint8_t *key; // may be NULL if no key
+ const uint8_t *ad; // may be NULL if no additional data
+ uint32_t key_size; // 0 if no key (32 bytes recommended otherwise)
+ uint32_t ad_size; // 0 if no additional data
+} crypto_argon2_extras;
+
+extern const crypto_argon2_extras crypto_argon2_no_extras;
+
+void crypto_argon2(uint8_t *hash, uint32_t hash_size, void *work_area,
+ crypto_argon2_config config,
+ crypto_argon2_inputs inputs,
+ crypto_argon2_extras extras);
// Key exchange (X-25519)
// ----------------------
RANDOM_INPUT(pass, 16);
RANDOM_INPUT(salt, 16);
- crypto_argon2_ctx s;
- memset(&s, 0, sizeof(s));
- s.algorithm = CRYPTO_ARGON2_I;
- s.nb_blocks = (u32)(SIZE / 1024);
- s.nb_passes = 3;
- s.nb_lanes = 1;
- s.salt_size = sizeof(salt);
- s.hash_size = sizeof(hash);
+ crypto_argon2_config config;
+ config.algorithm = CRYPTO_ARGON2_I;
+ config.nb_blocks = (u32)(SIZE / 1024);
+ config.nb_passes = 3;
+ config.nb_lanes = 1;
+
+ crypto_argon2_inputs inputs;
+ inputs.pass = pass;
+ inputs.salt = salt;
+ inputs.pass_size = sizeof(pass);
+ inputs.salt_size = sizeof(salt);
TIMING_START {
- crypto_argon2(hash, work_area, pass, sizeof(pass), salt, &s);
+ crypto_argon2(hash, sizeof(hash), work_area,
+ config, inputs, crypto_argon2_no_extras);
}
TIMING_END;
}
//////////////
static void argon2(vector_reader *reader)
{
- crypto_argon2_ctx s;
- s.algorithm = load32_le(next_input(reader).buf);
- s.nb_blocks = load32_le(next_input(reader).buf);
- s.nb_passes = load32_le(next_input(reader).buf);
- s.nb_lanes = load32_le(next_input(reader).buf);
+ crypto_argon2_config config;
+ config.algorithm = load32_le(next_input(reader).buf);
+ config.nb_blocks = load32_le(next_input(reader).buf);
+ config.nb_passes = load32_le(next_input(reader).buf);
+ config.nb_lanes = load32_le(next_input(reader).buf);
+
vector pass = next_input(reader);
vector salt = next_input(reader);
vector key = next_input(reader);
vector ad = next_input(reader);
vector out = next_output(reader);
- void *work_area = alloc(s.nb_blocks * 1024);
+ void *work_area = alloc(config.nb_blocks * 1024);
+
+ crypto_argon2_inputs inputs;
+ inputs.pass = pass.buf;
+ inputs.salt = salt.buf;
+ inputs.pass_size = pass.size;
+ inputs.salt_size = salt.size;
- s.hash_size = out.size;
- s.salt_size = salt.size;
- s.key = key.buf;
- s.key_size = key.size;
- s.ad = ad.buf;
- s.ad_size = ad.size;
+ crypto_argon2_extras extras;
+ extras.key = key.buf;
+ extras.ad = ad.buf;
+ extras.key_size = key.size;
+ extras.ad_size = ad.size;
- crypto_argon2(out.buf, work_area, pass.buf, pass.size, salt.buf, &s);
+ crypto_argon2(out.buf, out.size, work_area, config, inputs, extras);
free(work_area);
}
u8 key [32]; FOR (j, 0, 32) { key [j] = work_area[j + key_offset]; }
u8 ad [32]; FOR (j, 0, 32) { ad [j] = work_area[j + ad_offset]; }
- crypto_argon2_ctx s;
- s.algorithm = CRYPTO_ARGON2_I;
- s.nb_blocks = 8;
- s.nb_passes = 1;
- s.nb_lanes = 1;
- s.salt_size = sizeof(salt);
- s.hash_size = sizeof(hash1);
- s.key_size = sizeof(key);
- s.ad_size = sizeof(ad);
- s.key = key;
- s.ad = ad;
- crypto_argon2(hash1, clean_work_area, pass, 16, salt, &s);
+ crypto_argon2_config config;
+ config.algorithm = CRYPTO_ARGON2_I;
+ config.nb_blocks = 8;
+ config.nb_passes = 1;
+ config.nb_lanes = 1;
+
+ crypto_argon2_inputs inputs;
+ inputs.pass = pass;
+ inputs.salt = salt;
+ inputs.pass_size = sizeof(pass);
+ inputs.salt_size = sizeof(salt);
+
+ crypto_argon2_extras extras;
+ extras.key = key;
+ extras.ad = ad;
+ extras.key_size = sizeof(key);
+ extras.ad_size = sizeof(ad);
+
+ crypto_argon2(hash1, 32, clean_work_area, config, inputs, extras);
// with overlap
- s.key = work_area + key_offset;
- s.ad = work_area + ad_offset;
- crypto_argon2(hash2, work_area,
- work_area + pass_offset, 16,
- work_area + salt_offset, &s);
+ inputs.pass = work_area + pass_offset;
+ inputs.salt = work_area + salt_offset;
+ extras.key = work_area + key_offset;
+ extras.ad = work_area + ad_offset;
+ crypto_argon2(hash2, 32, work_area, config, inputs, extras);
ASSERT_EQUAL(hash1, hash2, 32);
}
static void argon2(vector_reader *reader)
{
- crypto_argon2_ctx s;
- s.algorithm = load32_le(next_input(reader).buf);
- s.nb_blocks = load32_le(next_input(reader).buf);
- s.nb_passes = load32_le(next_input(reader).buf);
- s.nb_lanes = load32_le(next_input(reader).buf);
+ crypto_argon2_config config;
+ config.algorithm = load32_le(next_input(reader).buf);
+ config.nb_blocks = load32_le(next_input(reader).buf);
+ config.nb_passes = load32_le(next_input(reader).buf);
+ config.nb_lanes = load32_le(next_input(reader).buf);
+
vector pass = next_input(reader);
vector salt = next_input(reader);
vector key = next_input(reader);
vector ad = next_input(reader);
vector out = next_output(reader);
- void *work_area = alloc(s.nb_blocks * 1024);
+ void *work_area = alloc(config.nb_blocks * 1024);
+
+ crypto_argon2_inputs inputs;
+ inputs.pass = pass.buf;
+ inputs.salt = salt.buf;
+ inputs.pass_size = pass.size;
+ inputs.salt_size = salt.size;
- s.hash_size = out.size;
- s.salt_size = salt.size;
- s.key = key.buf;
- s.key_size = key.size;
- s.ad = ad.buf;
- s.ad_size = ad.size;
+ crypto_argon2_extras extras;
+ extras.key = key.buf;
+ extras.ad = ad.buf;
+ extras.key_size = key.size;
+ extras.ad_size = ad.size;
- crypto_argon2(out.buf, work_area, pass.buf, pass.size, salt.buf, &s);
+ crypto_argon2(out.buf, out.size, work_area, config, inputs, extras);
free(work_area);
}