#define HASH_FINAL COMBINE2(HASH, _final)
#define FOR(i, start, end) for (size_t (i) = (start); (i) < (end); (i)++)
-typedef uint8_t u8;
+typedef uint8_t u8;
typedef uint32_t u32;
-typedef int32_t i32;
-typedef int64_t i64;
+typedef int32_t i32;
+typedef int64_t i64;
typedef uint64_t u64;
+static u32 load24_le(const u8 s[3])
+{
+ return (u32)s[0]
+ | ((u32)s[1] << 8)
+ | ((u32)s[2] << 16);
+}
+
static u32 load32_le(const u8 s[4])
{
return (u32)s[0]
| ((u64)s[7] << 56);
}
-static void store32_le(u8 output[4], u32 input)
+static void store32_le(u8 out[4], u32 in)
{
- output[0] = input & 0xff;
- output[1] = (input >> 8) & 0xff;
- output[2] = (input >> 16) & 0xff;
- output[3] = (input >> 24) & 0xff;
+ out[0] = in & 0xff;
+ out[1] = (in >> 8) & 0xff;
+ out[2] = (in >> 16) & 0xff;
+ out[3] = (in >> 24) & 0xff;
}
-static void store64_le(u8 output[8], u64 input)
+static void store64_le(u8 out[8], u64 in)
{
- output[0] = input & 0xff;
- output[1] = (input >> 8) & 0xff;
- output[2] = (input >> 16) & 0xff;
- output[3] = (input >> 24) & 0xff;
- output[4] = (input >> 32) & 0xff;
- output[5] = (input >> 40) & 0xff;
- output[6] = (input >> 48) & 0xff;
- output[7] = (input >> 56) & 0xff;
+ out[0] = in & 0xff;
+ out[1] = (in >> 8) & 0xff;
+ out[2] = (in >> 16) & 0xff;
+ out[3] = (in >> 24) & 0xff;
+ out[4] = (in >> 32) & 0xff;
+ out[5] = (in >> 40) & 0xff;
+ out[6] = (in >> 48) & 0xff;
+ out[7] = (in >> 56) & 0xff;
}
static u64 rotr64(u64 x, u64 n) { return (x >> n) ^ (x << (64 - n)); }
int crypto_memcmp(const u8 *p1, const u8 *p2, size_t n)
{
unsigned diff = 0;
- FOR (i, 0, n) { diff |= (p1[i] ^ p2[i]); }
+ FOR (i, 0, n) {
+ diff |= (p1[i] ^ p2[i]);
+ }
return (1 & ((diff - 1) >> 8)) - 1;
}
int crypto_zerocmp(const u8 *p, size_t n)
{
unsigned diff = 0;
- FOR (i, 0, n) { diff |= p[i]; }
+ FOR (i, 0, n) {
+ diff |= p[i];
+ }
return (1 & ((diff - 1) >> 8)) - 1;
}
}
ctx->pool_idx = 0;
ctx->input[12]++;
- if (ctx->input[12] == 0) { ctx->input[13]++; }
+ if (ctx->input[12] == 0) {
+ ctx->input[13]++;
+ }
}
}
const u8 key[32],
const u8 nonce[8])
{
- chacha20_init_key(ctx, key ); // key
- crypto_chacha20_set_ctr(ctx, 0); // counter
+ chacha20_init_key (ctx, key); // key
+ crypto_chacha20_set_ctr(ctx, 0 ); // counter
ctx->input[14] = load32_le(nonce + 0); // nonce
ctx->input[15] = load32_le(nonce + 4); // nonce
}
void crypto_chacha20_encrypt(crypto_chacha_ctx *ctx,
u8 *cipher_text,
const u8 *plain_text,
- size_t message_size)
+ size_t text_size)
{
// Align ourselves with 4 byte words
- while (ctx->pool_idx % 4 != 0 && message_size > 0) {
+ while (ctx->pool_idx % 4 != 0 && text_size > 0) {
u8 stream = chacha20_pool_byte(ctx);
- u8 plain = plain_text ? *(plain_text++) : 0;
- *(cipher_text++) = stream ^ plain;
- message_size--;
+ u8 plain = 0;
+ if (plain_text != 0) {
+ plain = *plain_text;
+ plain_text++;
+ }
+ *cipher_text = stream ^ plain;
+ text_size--;
+ cipher_text++;
}
// Main processing by 4 byte chunks
- size_t nb_words = message_size / 4;
- size_t remainder = message_size % 4;
+ size_t nb_words = text_size / 4;
+ size_t remainder = text_size % 4;
FOR (i, 0, nb_words) {
chacha20_refill_pool(ctx);
u32 txt = 0;
FOR (i, 0, remainder) {
chacha20_refill_pool(ctx);
u8 stream = chacha20_pool_byte(ctx);
- u8 plain = plain_text ? *(plain_text++) : 0;
- *(cipher_text++) = stream ^ plain;
+ u8 plain = 0;
+ if (plain_text != 0) {
+ plain = *plain_text;
+ plain_text++;
+ }
+ *cipher_text = stream ^ plain;
+ cipher_text++;
}
}
const u32 r2 = ctx->r[2]; // r2 <= 0ffffffc
const u32 r3 = ctx->r[3]; // r3 <= 0ffffffc
const u32 rr0 = (r0 >> 2) * 5; // rr0 <= 13fffffb // lose 2 bits...
- const u32 rr1 = (r1 >> 2) + r1; // rr1 <= 13fffffb // * 5 trick
- const u32 rr2 = (r2 >> 2) + r2; // rr2 <= 13fffffb // * 5 trick
- const u32 rr3 = (r3 >> 2) + r3; // rr3 <= 13fffffb // * 5 trick
+ const u32 rr1 = (r1 >> 2) + r1; // rr1 <= 13fffffb // rr1 == (r1 >> 2) * 5
+ const u32 rr2 = (r2 >> 2) + r2; // rr2 <= 13fffffb // rr1 == (r2 >> 2) * 5
+ const u32 rr3 = (r3 >> 2) + r3; // rr3 <= 13fffffb // rr1 == (r3 >> 2) * 5
// (h + c) * r, without carry propagation
const u64 x0 = s0*r0 + s1*rr3 + s2*rr2 + s3*rr1 + s4*rr0;//<=97ffffe007fffff8
// (re-)initializes the input counter and input buffer
static void poly_clear_c(crypto_poly1305_ctx *ctx)
{
- FOR (i, 0, 4) { ctx->c[i] = 0; }
+ FOR (i, 0, 4) {
+ ctx->c[i] = 0;
+ }
ctx->c_idx = 0;
}
}
}
-static void poly_set_input(crypto_poly1305_ctx *ctx, u8 input)
+static void poly_take_input(crypto_poly1305_ctx *ctx, u8 input)
{
- ctx->c[ctx->c_idx / 4] |= (u32)input << ((ctx->c_idx % 4) * 8);
+ size_t word = ctx->c_idx / 4;
+ size_t byte = ctx->c_idx % 4;
+ ctx->c[word] |= (u32)input << (byte * 8);
ctx->c_idx++;
}
void crypto_poly1305_init(crypto_poly1305_ctx *ctx, const u8 key[32])
{
- // constant init
- FOR (i, 0, 5) { ctx->h [i] = 0; } // initial hash: zero
- ctx->c [4] = 1; // add 2^130 to every input block
- ctx->pad[4] = 0; // poly_add() compatibility
+ // Initial hash is zero
+ FOR (i, 0, 5) {
+ ctx->h [i] = 0;
+ }
+ // add 2^130 to every input block
+ ctx->c [4] = 1;
poly_clear_c(ctx);
// load r and pad (r has some of its bits cleared)
- /**/ ctx->r [0] = load32_le(key ) & 0x0fffffff;
- FOR (i, 1, 4) { ctx->r [i] = load32_le(key + i*4) & 0x0ffffffc; }
- FOR (i, 0, 4) { ctx->pad[i] = load32_le(key + i*4 + 16); }
+ FOR (i, 0, 1) { ctx->r [0] = load32_le(key ) & 0x0fffffff; }
+ FOR (i, 1, 4) { ctx->r [i] = load32_le(key + i*4 ) & 0x0ffffffc; }
+ FOR (i, 0, 4) { ctx->pad[i] = load32_le(key + i*4 + 16); }
}
void crypto_poly1305_update(crypto_poly1305_ctx *ctx,
- const u8 *msg, size_t msg_size)
+ const u8 *message, size_t message_size)
{
// Align ourselves with 4 byte words
- while (ctx->c_idx % 4 != 0 && msg_size > 0) {
- poly_set_input(ctx, *msg);
- msg++;
- msg_size--;
+ while (ctx->c_idx % 4 != 0 && message_size > 0) {
+ poly_take_input(ctx, *message);
+ message++;
+ message_size--;
}
// Process the input 4 bytes at a time
- size_t nb_words = msg_size / 4;
- size_t remainder = msg_size % 4;
+ size_t nb_words = message_size / 4;
+ size_t remainder = message_size % 4;
FOR (i, 0, nb_words) {
poly_end_block(ctx);
- ctx->c[ctx->c_idx / 4] = load32_le(msg);
- msg += 4;
+ ctx->c[ctx->c_idx / 4] = load32_le(message);
+ message += 4;
ctx->c_idx += 4;
}
poly_end_block(ctx);
}
FOR (i, 0, remainder) {
- poly_set_input(ctx, msg[i]);
+ poly_take_input(ctx, message[i]);
}
}
// move the final 1 according to remaining input length
// (We may add less than 2^130 to the last input block)
ctx->c[4] = 0;
- poly_set_input(ctx, 1);
+ poly_take_input(ctx, 1);
// one last hash update
poly_block(ctx);
}
u += (i64)(ctx->h[3]) + ctx->pad[3]; store32_le(mac + 12, u);
}
-void crypto_poly1305_auth(u8 mac[16], const u8 *msg,
- size_t msg_size, const u8 key[32])
+void crypto_poly1305_auth(u8 mac[16], const u8 *message,
+ size_t message_size, const u8 key[32])
{
crypto_poly1305_ctx ctx;
crypto_poly1305_init (&ctx, key);
- crypto_poly1305_update(&ctx, msg, msg_size);
+ crypto_poly1305_update(&ctx, message, message_size);
crypto_poly1305_final (&ctx, mac);
}
};
// increment the input offset
-static void incr(crypto_blake2b_ctx *ctx)
+static void blake2b_incr(crypto_blake2b_ctx *ctx)
{
u64 *x = ctx->input_offset;
- size_t y = ctx->buf_idx;
- x[0] += y; // increment low word
- if (x[0] < y) { x[1]++; } // carry overflow to high word
+ size_t y = ctx->input_idx;
+ x[0] += y;
+ if (x[0] < y) {
+ x[1]++;
+ }
}
static void blake2b_set_input(crypto_blake2b_ctx *ctx, u8 input)
{
- ctx->buf[ctx->buf_idx / 8] |= (u64)input << (8 * (ctx->buf_idx % 8));
- ctx->buf_idx++;
+ size_t word = ctx->input_idx / 8;
+ size_t byte = ctx->input_idx % 8;
+ ctx->input[word] |= (u64)input << (byte * 8);
+ ctx->input_idx++;
}
-static void compress(crypto_blake2b_ctx *ctx, int is_last_block)
+static void blake2b_compress(crypto_blake2b_ctx *ctx, int is_last_block)
{
static const u8 sigma[12][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
}
v[12] ^= ctx->input_offset[0];
v[13] ^= ctx->input_offset[1];
- if (is_last_block) { v[14] = ~v[14]; }
+ if (is_last_block) {
+ v[14] = ~v[14];
+ }
// mangle work vector
+ uint64_t *input = ctx->input;
FOR (i, 0, 12) {
#define BLAKE2_G(v, a, b, c, d, x, y) \
v[a] += v[b] + x; v[d] = rotr64(v[d] ^ v[a], 32); \
v[a] += v[b] + y; v[d] = rotr64(v[d] ^ v[a], 16); \
v[c] += v[d]; v[b] = rotr64(v[b] ^ v[c], 63); \
- BLAKE2_G(v, 0,4, 8,12, ctx->buf[sigma[i][ 0]], ctx->buf[sigma[i][ 1]]);
- BLAKE2_G(v, 1,5, 9,13, ctx->buf[sigma[i][ 2]], ctx->buf[sigma[i][ 3]]);
- BLAKE2_G(v, 2,6,10,14, ctx->buf[sigma[i][ 4]], ctx->buf[sigma[i][ 5]]);
- BLAKE2_G(v, 3,7,11,15, ctx->buf[sigma[i][ 6]], ctx->buf[sigma[i][ 7]]);
- BLAKE2_G(v, 0,5,10,15, ctx->buf[sigma[i][ 8]], ctx->buf[sigma[i][ 9]]);
- BLAKE2_G(v, 1,6,11,12, ctx->buf[sigma[i][10]], ctx->buf[sigma[i][11]]);
- BLAKE2_G(v, 2,7, 8,13, ctx->buf[sigma[i][12]], ctx->buf[sigma[i][13]]);
- BLAKE2_G(v, 3,4, 9,14, ctx->buf[sigma[i][14]], ctx->buf[sigma[i][15]]);
+ BLAKE2_G(v, 0, 4, 8, 12, input[sigma[i][ 0]], input[sigma[i][ 1]]);
+ BLAKE2_G(v, 1, 5, 9, 13, input[sigma[i][ 2]], input[sigma[i][ 3]]);
+ BLAKE2_G(v, 2, 6, 10, 14, input[sigma[i][ 4]], input[sigma[i][ 5]]);
+ BLAKE2_G(v, 3, 7, 11, 15, input[sigma[i][ 6]], input[sigma[i][ 7]]);
+ BLAKE2_G(v, 0, 5, 10, 15, input[sigma[i][ 8]], input[sigma[i][ 9]]);
+ BLAKE2_G(v, 1, 6, 11, 12, input[sigma[i][10]], input[sigma[i][11]]);
+ BLAKE2_G(v, 2, 7, 8, 13, input[sigma[i][12]], input[sigma[i][13]]);
+ BLAKE2_G(v, 3, 4, 9, 14, input[sigma[i][14]], input[sigma[i][15]]);
}
// update hash
- FOR (i, 0, 8) { ctx->hash[i] ^= v[i] ^ v[i+8]; }
+ FOR (i, 0, 8) {
+ ctx->hash[i] ^= v[i] ^ v[i+8];
+ }
}
static void blake2b_reset_input(crypto_blake2b_ctx *ctx)
{
- FOR(i, 0, 16) { ctx->buf[i] = 0; }
- ctx->buf_idx = 0;
+ FOR(i, 0, 16) {
+ ctx->input[i] = 0;
+ }
+ ctx->input_idx = 0;
}
static void blake2b_end_block(crypto_blake2b_ctx *ctx)
{
- if (ctx->buf_idx == 128) { // If buffer is full,
- incr(ctx); // update the input offset
- compress(ctx, 0); // compress the (not last) block
+ if (ctx->input_idx == 128) { // If buffer is full,
+ blake2b_incr(ctx); // update the input offset
+ blake2b_compress(ctx, 0); // and compress the (not last) block
blake2b_reset_input(ctx);
}
}
-void crypto_blake2b_general_init(crypto_blake2b_ctx *ctx, size_t out_size,
+void crypto_blake2b_general_init(crypto_blake2b_ctx *ctx, size_t hash_size,
const u8 *key, size_t key_size)
{
// initial hash
- FOR (i, 0, 8) { ctx->hash[i] = iv[i]; }
- ctx->hash[0] ^= 0x01010000 ^ (key_size << 8) ^ out_size;
+ FOR (i, 0, 8) {
+ ctx->hash[i] = iv[i];
+ }
+ ctx->hash[0] ^= 0x01010000 ^ (key_size << 8) ^ hash_size;
ctx->input_offset[0] = 0; // begining of the input, no offset
ctx->input_offset[1] = 0; // begining of the input, no offset
- ctx->buf_idx = 0; // buffer is empty
- ctx->hash_size = out_size; // remember the hash size we want
+ ctx->input_idx = 0; // buffer is empty
+ ctx->hash_size = hash_size; // remember the hash size we want
blake2b_reset_input(ctx); // clear the input buffer
// if there is a key, the first block is that key
if (key_size > 0) {
crypto_blake2b_update(ctx, key, key_size);
- ctx->buf_idx = 128;
+ ctx->input_idx = 128;
}
}
crypto_blake2b_general_init(ctx, 64, 0, 0);
}
-void crypto_blake2b_update(crypto_blake2b_ctx *ctx, const u8 *in, size_t in_size)
+void crypto_blake2b_update(crypto_blake2b_ctx *ctx,
+ const u8 *message, size_t message_size)
{
// Align ourselves with 8 byte words
- while (ctx->buf_idx % 8 != 0 && in_size > 0) {
- blake2b_set_input(ctx, *in);
- in++;
- in_size--;
+ while (ctx->input_idx % 8 != 0 && message_size > 0) {
+ blake2b_set_input(ctx, *message);
+ message++;
+ message_size--;
}
// Process the input 8 bytes at a time
- size_t nb_words = in_size / 8;
- size_t remainder = in_size % 8;
+ size_t nb_words = message_size / 8;
+ size_t remainder = message_size % 8;
FOR (i, 0, nb_words) {
blake2b_end_block(ctx);
- ctx->buf[ctx->buf_idx / 8] = load64_le(in);
- in += 8;
- ctx->buf_idx += 8;
+ ctx->input[ctx->input_idx / 8] = load64_le(message);
+ message += 8;
+ ctx->input_idx += 8;
}
// Load the remainder
blake2b_end_block(ctx);
}
FOR (i, 0, remainder) {
- blake2b_set_input(ctx, in[i]);
+ blake2b_set_input(ctx, message[i]);
}
}
-void crypto_blake2b_final(crypto_blake2b_ctx *ctx, u8 *out)
+void crypto_blake2b_final(crypto_blake2b_ctx *ctx, u8 *hash)
{
- incr(ctx); // update the input offset (the last block may not be full)
- compress(ctx, 1); // compress the last block
- FOR (i, 0, ctx->hash_size) {
- out[i] = (ctx->hash[i / 8] >> (8 * (i & 7))) & 0xff;
+ blake2b_incr(ctx); // update the input offset
+ blake2b_compress(ctx, 1); // compress the last block
+ size_t nb_words = ctx->hash_size / 8;
+ FOR (i, 0, nb_words) {
+ store64_le(hash + i*8, ctx->hash[i]);
+ }
+ FOR (i, nb_words * 8, ctx->hash_size) {
+ hash[i] = (ctx->hash[i / 8] >> (8 * (i % 8))) & 0xff;
}
}
-void crypto_blake2b_general(u8 *out, size_t out_size,
- const u8 *key, size_t key_size,
- const u8 *in, size_t in_size)
+void crypto_blake2b_general(u8 *hash , size_t hash_size,
+ const u8 *key , size_t key_size,
+ const u8 *message, size_t message_size)
{
crypto_blake2b_ctx ctx;
- crypto_blake2b_general_init(&ctx, out_size, key, key_size);
- crypto_blake2b_update(&ctx, in, in_size);
- crypto_blake2b_final(&ctx, out);
+ crypto_blake2b_general_init(&ctx, hash_size, key, key_size);
+ crypto_blake2b_update(&ctx, message, message_size);
+ crypto_blake2b_final(&ctx, hash);
}
-void crypto_blake2b(u8 out[64], const u8 *in, size_t in_size)
+void crypto_blake2b(u8 hash[64], const u8 *message, size_t message_size)
{
- crypto_blake2b_general(out, 64, 0, 0, in, in_size);
+ crypto_blake2b_general(hash, 64, 0, 0, message, message_size);
}
////////////////
// references to R, Z, Q etc. come from the spec
-typedef struct { u64 a[128]; } block; // 1024 octets
+// Argon2 operates on 1024 byte blocks.
+typedef struct { u64 a[128]; } block;
static u32 min(u32 a, u32 b) { return a <= b ? a : b; }
static void load_block(block *b, const u8 bytes[1024])
{
- FOR (i, 0, 128) { b->a[i] = load64_le(bytes + i*8); }
+ FOR (i, 0, 128) {
+ b->a[i] = load64_le(bytes + i*8);
+ }
}
static void store_block(u8 bytes[1024], const block *b)
{
- FOR (i, 0, 128) { store64_le(bytes + i*8, b->a[i]); }
+ FOR (i, 0, 128) {
+ store64_le(bytes + i*8, b->a[i]);
+ }
}
-// type of copy_block() and xor_block()
-typedef void (*copy_fun) (block*, const block*);
static void copy_block(block *o,const block*in){FOR(i,0,128) o->a[i] = in->a[i];}
static void xor_block(block *o,const block*in){FOR(i,0,128) o->a[i]^= in->a[i];}
// Hash with a virtually unlimited digest size.
// Doesn't extract more entropy than the base hash function.
// Mainly used for filling a whole kilobyte block with pseudo-random bytes.
+// (One could use a stream cipher with a seed hash as the key, but
+// this would introduce another dependency —and point of failure.)
static void extended_hash(u8 *digest, u32 digest_size,
- const u8 *input , u32 input_size)
+ const u8 *input , u32 input_size)
{
crypto_blake2b_ctx ctx;
crypto_blake2b_general_init(&ctx, min(digest_size, 64), 0, 0);
static void g_copy(block *result, const block *x, const block *y)
{
block tmp;
- copy_block(&tmp, x); // tmp = X
- xor_block (&tmp, y); // tmp = X ^ Y = R
- copy_block(result, &tmp);// result = R
- g_rounds(&tmp); // tmp = Z
- xor_block(result, &tmp); // result = R ^ Z
+ copy_block(&tmp , x ); // tmp = X
+ xor_block (&tmp , y ); // tmp = X ^ Y = R
+ copy_block(result, &tmp); // result = R (only difference with g_xor)
+ g_rounds (&tmp); // tmp = Z
+ xor_block (result, &tmp); // result = R ^ Z
}
// The compression function G (xor version for subsequent passes)
static void g_xor(block *result, const block *x, const block *y)
{
block tmp;
- copy_block(&tmp, x); // tmp = X
- xor_block (&tmp, y); // tmp = X ^ Y = R
- xor_block(result, &tmp); // result = R ^ old
- g_rounds(&tmp); // tmp = Z
- xor_block(result, &tmp); // result = R ^ old ^ Z
+ copy_block(&tmp , x ); // tmp = X
+ xor_block (&tmp , y ); // tmp = X ^ Y = R
+ xor_block (result, &tmp); // result = R ^ old (only difference with g_copy)
+ g_rounds (&tmp); // tmp = Z
+ xor_block (result, &tmp); // result = R ^ old ^ Z
}
// unary version of the compression function.
xor_block(work_block, &tmp); // work_block = Z ^ R
}
+// Argon2i uses a kind of stream cipher to determine which reference
+// block it will take to synthesise the next block. This context hold
+// that stream's state. (It's very similar to Chacha20. The block b
+// is anologous to Chacha's own pool)
typedef struct {
block b;
- u32 pass_number; u32 slice_number;
- u32 nb_blocks; u32 nb_iterations;
- u32 ctr; u32 offset;
+ u32 pass_number;
+ u32 slice_number;
+ u32 nb_blocks;
+ u32 nb_iterations;
+ u32 ctr;
+ u32 offset;
} gidx_ctx;
+// The block in the context will determine array indices. To avoid
+// timing attacks, it only depends on public information. No looking
+// at a previous block to seed the next. This makes offline attacks
+// easier, but timing attacks are the bigger threat in many settings.
static void gidx_refresh(gidx_ctx *ctx)
{
// seed the begining of the block...
ctx->ctr = 0;
// Offset from the begining of the segment. For the first slice
- // of the firs pass, we start at the *third* block, so the offset
+ // of the first pass, we start at the *third* block, so the offset
// starts at 2, not 0.
if (pass_number != 0 || slice_number != 0) {
ctx->offset = 0;
}
// Main algorithm
-void crypto_argon2i(u8 *tag, u32 tag_size,
+void crypto_argon2i(u8 *hash, u32 hash_size,
void *work_area, u32 nb_blocks, u32 nb_iterations,
const u8 *password, u32 password_size,
const u8 *salt, u32 salt_size,
crypto_blake2b_init(&ctx);
blake_update_32 (&ctx, 1 ); // p: number of threads
- blake_update_32 (&ctx, tag_size );
+ blake_update_32 (&ctx, hash_size );
blake_update_32 (&ctx, nb_blocks );
blake_update_32 (&ctx, nb_iterations);
blake_update_32 (&ctx, 0x13 ); // v: version number
// fill (then re-fill) the rest of the blocks
FOR (pass_number, 0, nb_iterations) {
- int first_pass = pass_number == 0;
+ int first_pass = pass_number == 0;
FOR (segment, 0, 4) {
gidx_ctx ctx;
}
}
}
- // hash the very last block with H' into the output tag
+ // hash the very last block with H' into the output hash
u8 final_block[1024];
store_block(final_block, blocks + (nb_blocks - 1));
- extended_hash(tag, tag_size, final_block, 1024);
+ extended_hash(hash, hash_size, final_block, 1024);
}
////////////////////////////////////
/// Arithmetic modulo 2^255 - 19 ///
////////////////////////////////////
// Taken from Supercop's ref10 implementation.
-// A bit bigger than TweetNaCl, about 8 times faster.
+// A bit bigger than TweetNaCl, over 4 times faster.
// field element
typedef i32 fe[10];
static void fe_0 (fe h) { FOR(i,0,10) h[i] = 0; }
-static void fe_1 (fe h) { h[0] = 1; FOR(i,1,10) h[i] = 0; }
+static void fe_1 (fe h) { h[0] = 1; FOR(i,1,10) h[i] = 0; }
static void fe_neg (fe h,const fe f) {FOR(i,0,10) h[i] = -f[i]; }
static void fe_add (fe h,const fe f,const fe g){FOR(i,0,10) h[i] = f[i] + g[i];}
static void fe_sub (fe h,const fe f,const fe g){FOR(i,0,10) h[i] = f[i] - g[i];}
}
}
-static u32 load24_le(const u8 s[3])
-{
- return (u32)s[0]
- | ((u32)s[1] << 8)
- | ((u32)s[2] << 16);
-}
-
static void fe_carry(fe h, i64 t[10])
{
i64 c0, c1, c2, c3, c4, c5, c6, c7, c8, c9;
static void fe_mul_small(fe h, const fe f, i32 g)
{
i64 t[10];
- FOR(i, 0, 10) { t[i] = f[i] * (i64) g; }
+ FOR(i, 0, 10) {
+ t[i] = f[i] * (i64) g;
+ }
fe_carry(h, t);
}
static void fe_mul121666(fe h, const fe f) { fe_mul_small(h, f, 121666); }
static void fe_tobytes(u8 s[32], const fe h)
{
i32 t[10];
- FOR (i, 0, 10) { t[i] = h[i]; }
-
+ FOR (i, 0, 10) {
+ t[i] = h[i];
+ }
i32 q = (19 * t[9] + (((i32) 1) << 24)) >> 25;
FOR (i, 0, 5) {
q += t[2*i ]; q >>= 26;
s[31] |= 64;
}
-static void x25519_ladder(const fe x1, fe x2, fe z2, fe x3, fe z3, const u8 scalar[32])
+static void x25519_ladder(const fe x1, fe x2, fe z2, fe x3, fe z3,
+ const u8 scalar[32])
{
// Montgomery ladder
// In projective coordinates, to avoid divisons: x = X / Z
fe_add(t0, t0, z3); fe_mul(z3, x1, z2); fe_mul(z2, t1, t0);
}
// last swap is necessary to compensate for the xor trick
+ // Note: after this swap, P3 == P2 + P1.
fe_cswap(x2, x3, swap);
fe_cswap(z2, z3, swap);
}
const u8 their_public_key[32])
{
// computes the scalar product
- fe x1, x2, z2, x3, z3;
+ fe x1;
fe_frombytes(x1, their_public_key);
// restrict the possible scalar values
- u8 e[32]; FOR (i, 0, 32) { e[i] = your_secret_key[i]; }
+ u8 e[32];
+ FOR (i, 0, 32) {
+ e[i] = your_secret_key[i];
+ }
trim_scalar(e);
// computes the actual scalar product (the result is in x2 and z2)
+ fe x2, z2, x3, z3;
x25519_ladder(x1, x2, z2, x3, z3, e);
// normalises the coordinates: x == X / Z
///////////////
// Point in a twisted Edwards curve,
-// in extended projective coordinates
+// in extended projective coordinates.
// x = X/Z, y = Y/Z, T = XY/Z
typedef struct { fe X; fe Y; fe Z; fe T; } ge;
static int ge_frombytes_neg(ge *h, const u8 s[32])
{
static const fe d = {
- -10913610,13857413,-15372611,6949391,114729,
- -8787816,-6275908,-3247719,-18696448,-12055116
+ -10913610, 13857413, -15372611, 6949391, 114729,
+ -8787816, -6275908, -3247719, -18696448, -12055116
} ;
static const fe sqrtm1 = {
- -32595792,-7943725,9377950,3500415,12389472,
- -272473,-25146209,-2005654,326686,11406482
+ -32595792, -7943725, 9377950, 3500415, 12389472,
+ -272473, -25146209, -2005654, 326686, 11406482
} ;
fe u, v, v3, vxx, check;
fe_frombytes(h->Y, s);
return 0;
}
-// for point additon
-static const fe D2 = { // - 2 * 121665 / 121666
- 0x2b2f159, 0x1a6e509, 0x22add7a, 0x0d4141d, 0x0038052,
- 0x0f3d130, 0x3407977, 0x19ce331, 0x1c56dff, 0x0901b67
-};
-
static void ge_add(ge *s, const ge *p, const ge *q)
{
+ static const fe D2 = { // - 2 * 121665 / 121666
+ 0x2b2f159, 0x1a6e509, 0x22add7a, 0x0d4141d, 0x0038052,
+ 0x0f3d130, 0x3407977, 0x19ce331, 0x1c56dff, 0x0901b67
+ };
fe a, b, c, d, e, f, g, h;
// A = (Y1-X1) * (Y2-X2)
// B = (Y1+X1) * (Y2+X2)
fe_mul(s->T, e, h); // T3 = E * H
}
+// Performing the scalar multiplication directly in Twisted Edwards
+// space woud be simpler, but also slower. So we do it in Montgomery
+// space instead. The sign of the Y coordinate however gets lost in
+// translation, so we use a dirty trick to recover it.
static void ge_scalarmult(ge *p, const ge *q, const u8 scalar[32])
{
// sqrt(-486664)
// montgomery scalarmult
x25519_ladder(x1, x2, z2, x3, z3, scalar);
- // recover the y1 coordinate (Katsuyuki Okeya & Kouichi Sakurai, 2001)
+ // Recover the y coordinate (Katsuyuki Okeya & Kouichi Sakurai, 2001)
+ // Note the shameless reuse of x1: (x1, y1, z1) will correspond to
+ // what was originally (x2, z2).
fe_mul(t1, x1, z2); // t1 = x1 * z2
fe_add(t2, x2, t1); // t2 = x2 + t1
fe_sub(t3, x2, t1); // t3 = x2 − t1
carry = x[i] >> 8;
x[i] &= 255;
}
- FOR(i, 0, 32) { x[i] -= carry * L[i]; }
+ FOR(i, 0, 32) {
+ x[i] -= carry * L[i];
+ }
FOR(i, 0, 32) {
x[i+1] += x[i] >> 8;
r[i ] = x[i] & 255;
static void reduce(u8 r[64])
{
i64 x[64];
- FOR(i, 0, 64) x[i] = (u64) r[i];
- FOR(i, 0, 64) r[i] = 0;
+ FOR(i, 0, 64) {
+ x[i] = (u64) r[i];
+ r[i] = 0;
+ }
modL(r, x);
}
// hashes R || A || M, reduces it modulo L
-static void hash_ram(u8 k[64], const u8 R[32], const u8 A[32], const u8 *M, size_t M_size)
+static void hash_ram(u8 k[64], const u8 R[32], const u8 A[32],
+ const u8 *M, size_t M_size)
{
HASH_CTX ctx;
HASH_INIT (&ctx);
hash_ram(h_ram, signature, pk, message, message_size);
i64 s[64]; // s = r + h_ram * a
- FOR(i, 0, 32) s[i] = (u64) r[i];
- FOR(i, 32, 64) s[i] = 0;
- FOR(i, 0, 32) {
+ FOR(i, 0, 32) { s[i] = (u64) r[i]; }
+ FOR(i, 32, 64) { s[i] = 0; }
+ FOR(i, 0, 32) {
FOR(j, 0, 32) {
s[i+j] += h_ram[i] * (u64) a[j];
}
{
ge A, p, sB, diff;
u8 h_ram[64], R_check[32];
- if (ge_frombytes_neg(&A, public_key)) { return -1; } // -A
+ if (ge_frombytes_neg(&A, public_key)) { // -A
+ return -1;
+ }
hash_ram(h_ram, signature, public_key, message, message_size);
- ge_scalarmult(&p, &A, h_ram); // p = -A*h_ram
+ ge_scalarmult(&p, &A, h_ram); // p = -A*h_ram
ge_scalarmult_base(&sB, signature + 32);
- ge_add(&diff, &p, &sB); // diff = s - A*h_ram
+ ge_add(&diff, &p, &sB); // diff = s - A*h_ram
ge_tobytes(R_check, &diff);
return crypto_memcmp(signature, R_check, 32); // R == s - A*h_ram ? OK : fail
}
/// Authenticated encryption ///
////////////////////////////////
static void authenticate2(u8 mac[16] , const u8 auth_key[32],
- const u8 *t1, size_t size1,
- const u8 *t2, size_t size2)
+ const u8 *t1, size_t size1,
+ const u8 *t2, size_t size2)
{
crypto_poly1305_ctx a_ctx;
crypto_poly1305_init (&a_ctx, auth_key);
}
void crypto_aead_lock(u8 mac[16],
- u8 *ciphertext,
+ u8 *cipher_text,
const u8 key[32],
const u8 nonce[24],
- const u8 *ad , size_t ad_size,
- const u8 *plaintext, size_t text_size)
+ const u8 *ad , size_t ad_size,
+ const u8 *plain_text, size_t text_size)
{ // encrypt then mac
u8 auth_key[32];
crypto_chacha_ctx e_ctx;
crypto_chacha20_x_init (&e_ctx, key, nonce);
crypto_chacha20_stream (&e_ctx, auth_key, 32);
- crypto_chacha20_encrypt(&e_ctx, ciphertext, plaintext, text_size);
- authenticate2(mac, auth_key, ad, ad_size, ciphertext, text_size);
+ crypto_chacha20_encrypt(&e_ctx, cipher_text, plain_text, text_size);
+ authenticate2(mac, auth_key, ad, ad_size, cipher_text, text_size);
}
-int crypto_aead_unlock(u8 *plaintext,
+int crypto_aead_unlock(u8 *plain_text,
const u8 key[32],
const u8 nonce[24],
const u8 mac[16],
- const u8 *ad , size_t ad_size,
- const u8 *ciphertext, size_t text_size)
+ const u8 *ad , size_t ad_size,
+ const u8 *cipher_text, size_t text_size)
{
u8 auth_key[32], real_mac[16];
crypto_chacha_ctx e_ctx;
crypto_chacha20_x_init(&e_ctx, key, nonce);
crypto_chacha20_stream(&e_ctx, auth_key, 32);
- authenticate2(real_mac, auth_key, ad, ad_size, ciphertext, text_size);
- if (crypto_memcmp(real_mac, mac, 16)) { return -1; } // reject forgeries
- crypto_chacha20_encrypt(&e_ctx, plaintext, ciphertext, text_size);
+ authenticate2(real_mac, auth_key, ad, ad_size, cipher_text, text_size);
+ if (crypto_memcmp(real_mac, mac, 16)) {
+ return -1; // reject forgeries
+ }
+ crypto_chacha20_encrypt(&e_ctx, plain_text, cipher_text, text_size);
return 0;
}
void crypto_lock(u8 mac[16],
- u8 *ciphertext,
+ u8 *cipher_text,
const u8 key[32],
const u8 nonce[24],
- const u8 *plaintext, size_t text_size)
+ const u8 *plain_text, size_t text_size)
{
- crypto_aead_lock(mac, ciphertext, key, nonce, 0, 0, plaintext, text_size);
+ crypto_aead_lock(mac, cipher_text, key, nonce, 0, 0, plain_text, text_size);
}
-int crypto_unlock(u8 *plaintext,
+int crypto_unlock(u8 *plain_text,
const u8 key[32],
const u8 nonce[24],
const u8 mac[16],
- const u8 *ciphertext, size_t text_size)
+ const u8 *cipher_text, size_t text_size)
{
- return crypto_aead_unlock(plaintext, key, nonce, mac, 0, 0,
- ciphertext, text_size);
+ return crypto_aead_unlock(plain_text, key, nonce, mac, 0, 0,
+ cipher_text, text_size);
}