From: Loup Vaillant Date: Sun, 25 Feb 2018 20:00:46 +0000 (+0100) Subject: More readable and more flexible loading code X-Git-Url: https://git.codecow.com/?a=commitdiff_plain;h=06dc2f3fd452f5d62cf4f318ab23945fbd0d61a4;p=Monocypher.git More readable and more flexible loading code The loading code for Chacha20, Poly1305, Blake2b, and SHA-512 was a bit ad-hoc. This made it a bit impenetrable, as well as error prone. Chacha20 in particular was harder than it should be to adapt to faster implementation that proceed by several blocks at a time. So was Poly1305, I think. The loading code has been modified to conform to the following pattern: 1. Align ourselves with block boundaries 2. Process the message block by block 3. remaining bytes - The last section just calls general purpose update code. It's the only one that's mandatory. - The first section calls the same general purpose update code, with just enough input to reach the next block boundary. It must be present whenever the second section is. - The second section does optimised block-by-block update. It needs the first section to ensure alignment. Each section but the last updates the input pointers and lengths, allowing later sections may assume they were the first. Tests were performed with sections 1 2 3, 1 3, and 3 alone. They all yield the same, correct results. We could write an equivalence proof, but the property-based tests were designed to catch mistakes in the loading code in the first place. Maybe not worth the trouble. --- diff --git a/src/monocypher.c b/src/monocypher.c index eb97830..b15a414 100644 --- a/src/monocypher.c +++ b/src/monocypher.c @@ -29,6 +29,8 @@ typedef int32_t i32; typedef int64_t i64; typedef uint64_t u64; +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + static u32 load24_le(const u8 s[3]) { return (u32)s[0] @@ -184,6 +186,25 @@ void crypto_chacha20_H(u8 out[32], const u8 key[32], const u8 in[16]) WIPE_BUFFER(buffer); } +static void chacha20_encrypt(crypto_chacha_ctx *ctx, + u8 *cipher_text, + const u8 *plain_text, + size_t text_size) +{ + FOR (i, 0, text_size) { + if (ctx->pool_idx == 64) { + chacha20_refill_pool(ctx); + } + u8 plain = 0; + if (plain_text != 0) { + plain = *plain_text; + plain_text++; + } + *cipher_text = chacha20_pool_byte(ctx) ^ plain; + cipher_text++; + } +} + void crypto_chacha20_init(crypto_chacha_ctx *ctx, const u8 key[32], const u8 nonce[8]) @@ -215,23 +236,17 @@ void crypto_chacha20_encrypt(crypto_chacha_ctx *ctx, const u8 *plain_text, size_t text_size) { - // Align ourselves with a block - while ((ctx->pool_idx & 63) != 0 && text_size > 0) { - u8 stream = chacha20_pool_byte(ctx); - u8 plain = 0; - if (plain_text != 0) { - plain = *plain_text; - plain_text++; - } - *cipher_text = stream ^ plain; - text_size--; - cipher_text++; + // Align ourselves with block boundaries + size_t align = MIN(-ctx->pool_idx & 63, text_size); + chacha20_encrypt(ctx, cipher_text, plain_text, align); + if (plain_text != 0) { + plain_text += align; } + cipher_text += align; + text_size -= align; - // Main processing by 64 byte chunks - size_t nb_blocks = text_size >> 6; - size_t remainder = text_size & 63; - FOR (i, 0, nb_blocks) { + // Process the message block by block + FOR (i, 0, text_size >> 6) { // number of blocks chacha20_refill_pool(ctx); if (plain_text != 0) { FOR (j, 0, 16) { @@ -248,20 +263,10 @@ void crypto_chacha20_encrypt(crypto_chacha_ctx *ctx, } ctx->pool_idx = 64; } + text_size &= 63; - // Remaining input, byte by byte - FOR (i, 0, remainder) { - if (ctx->pool_idx == 64) { - chacha20_refill_pool(ctx); - } - u8 plain = 0; - if (plain_text != 0) { - plain = *plain_text; - plain_text++; - } - *cipher_text = chacha20_pool_byte(ctx) ^ plain; - cipher_text++; - } + // remaining bytes + chacha20_encrypt(ctx, cipher_text, plain_text, text_size); } void crypto_chacha20_stream(crypto_chacha_ctx *ctx, @@ -342,6 +347,18 @@ static void poly_take_input(crypto_poly1305_ctx *ctx, u8 input) ctx->c_idx++; } +static void poly_update(crypto_poly1305_ctx *ctx, + const u8 *message, size_t message_size) +{ + FOR (i, 0, message_size) { + poly_take_input(ctx, message[i]); + if (ctx->c_idx == 16) { + poly_block(ctx); + poly_clear_c(ctx); + } + } +} + void crypto_poly1305_init(crypto_poly1305_ctx *ctx, const u8 key[32]) { // Initial hash is zero @@ -360,19 +377,14 @@ void crypto_poly1305_init(crypto_poly1305_ctx *ctx, const u8 key[32]) void crypto_poly1305_update(crypto_poly1305_ctx *ctx, const u8 *message, size_t message_size) { - // Align ourselves with a block - while ((ctx->c_idx & 15) != 0 && message_size > 0) { - poly_take_input(ctx, *message); - message++; - message_size--; - } - if (ctx->c_idx == 16) { - poly_block(ctx); - poly_clear_c(ctx); - } - // Process the input block by block + // Align ourselves with block boundaries + size_t align = MIN(-ctx->c_idx & 15, message_size); + poly_update(ctx, message, align); + message += align; + message_size -= align; + + // Process the message block by block size_t nb_blocks = message_size >> 4; - size_t remainder = message_size & 15; FOR (i, 0, nb_blocks) { ctx->c[0] = load32_le(message + 0); ctx->c[1] = load32_le(message + 4); @@ -384,11 +396,10 @@ void crypto_poly1305_update(crypto_poly1305_ctx *ctx, if (nb_blocks > 0) { poly_clear_c(ctx); } + message_size &= 15; - // Input the remaining bytes - FOR (i, 0, remainder) { - poly_take_input(ctx, message[i]); - } + // remaining bytes + poly_update(ctx, message, message_size); } void crypto_poly1305_final(crypto_poly1305_ctx *ctx, u8 mac[16]) @@ -453,14 +464,6 @@ static void blake2b_incr(crypto_blake2b_ctx *ctx) } } -static void blake2b_set_input(crypto_blake2b_ctx *ctx, u8 input) -{ - size_t word = ctx->input_idx >> 3; - size_t byte = ctx->input_idx & 7; - ctx->input[word] |= (u64)input << (byte * 8); - ctx->input_idx++; -} - static void blake2b_compress(crypto_blake2b_ctx *ctx, int is_last_block) { static const u8 sigma[12][16] = { @@ -518,12 +521,12 @@ static void blake2b_compress(crypto_blake2b_ctx *ctx, int is_last_block) ctx->hash[7] ^= v7 ^ v15; } -static void blake2b_reset_input(crypto_blake2b_ctx *ctx) +static void blake2b_set_input(crypto_blake2b_ctx *ctx, u8 input, size_t index) { - FOR (i, 0, 16) { - ctx->input[i] = 0; - } - ctx->input_idx = 0; + size_t word = index >> 3; + size_t byte = index & 7; + ctx->input[word] &= ~((u64)0xff << (byte << 3)); + ctx->input[word] |= (u64)input << (byte << 3); } static void blake2b_end_block(crypto_blake2b_ctx *ctx) @@ -531,14 +534,17 @@ static void blake2b_end_block(crypto_blake2b_ctx *ctx) if (ctx->input_idx == 128) { // If buffer is full, blake2b_incr(ctx); // update the input offset blake2b_compress(ctx, 0); // and compress the (not last) block - blake2b_reset_input(ctx); + ctx->input_idx = 0; } } -static void blake2b_fill_block(crypto_blake2b_ctx *ctx, const u8 message[128]) +static void blake2b_update(crypto_blake2b_ctx *ctx, + const u8 *message, size_t message_size) { - FOR (j, 0, 16) { - ctx->input[j] = load64_le(message + j*8); + FOR (i, 0, message_size) { + blake2b_end_block(ctx); + blake2b_set_input(ctx, message[i], ctx->input_idx); + ctx->input_idx++; } } @@ -554,12 +560,15 @@ void crypto_blake2b_general_init(crypto_blake2b_ctx *ctx, size_t hash_size, ctx->input_offset[0] = 0; // begining of the input, no offset ctx->input_offset[1] = 0; // begining of the input, no offset ctx->hash_size = hash_size; // remember the hash size we want - blake2b_reset_input(ctx); // clear the input buffer + ctx->input_idx = 0; - // if there is a key, the first block is that key + // if there is a key, the first block is that key (padded with zeroes) if (key_size > 0) { - crypto_blake2b_update(ctx, key, key_size); - ctx->input_idx = 128; + u8 padded_key[128] = {0}; + FOR (i, 0, key_size) { + padded_key[i] = key[i]; + } + crypto_blake2b_update(ctx, padded_key, 128); } } @@ -571,42 +580,33 @@ void crypto_blake2b_init(crypto_blake2b_ctx *ctx) void crypto_blake2b_update(crypto_blake2b_ctx *ctx, const u8 *message, size_t message_size) { - // Align ourselves with blocks - while ((ctx->input_idx & 127) != 0 && message_size > 0) { - blake2b_set_input(ctx, *message); - message++; - message_size--; - } + // Align ourselves with block boundaries + size_t align = MIN(-ctx->input_idx & 127, message_size); + blake2b_update(ctx, message, align); + message += align; + message_size -= align; - // Process the input one block at a time - size_t nb_blocks = message_size >> 7; - size_t remainder = message_size & 127; - if (nb_blocks > 0) { - // first block + // Process the message block by block + FOR (i, 0, message_size >> 7) { // number of blocks blake2b_end_block(ctx); - blake2b_fill_block(ctx, message); + FOR (j, 0, 16) { + ctx->input[j] = load64_le(message + j*8); + } message += 128; ctx->input_idx = 128; - // subsequent blocks - FOR (i, 0, nb_blocks - 1) { - blake2b_incr(ctx); - blake2b_compress(ctx, 0); - blake2b_fill_block(ctx, message); - message += 128; - } } + message_size &= 127; - // Load the remainder - if (remainder != 0) { - blake2b_end_block(ctx); - } - FOR (i, 0, remainder) { - blake2b_set_input(ctx, message[i]); - } + // remaining bytes + blake2b_update(ctx, message, message_size); } void crypto_blake2b_final(crypto_blake2b_ctx *ctx, u8 *hash) { + // Pad the end of the block with zeroes + FOR (i, ctx->input_idx, 128) { + blake2b_set_input(ctx, 0, i); + } blake2b_incr(ctx); // update the input offset blake2b_compress(ctx, -1); // compress the last block size_t nb_words = ctx->hash_size >> 3; @@ -651,8 +651,6 @@ static void wipe_block(block *b) } } -static u32 min(u32 a, u32 b) { return a <= b ? a : b; } - // updates a blake2 hash with a 32 bit word, little endian. static void blake_update_32(crypto_blake2b_ctx *ctx, u32 input) { @@ -688,7 +686,7 @@ static void extended_hash(u8 *digest, u32 digest_size, const u8 *input , u32 input_size) { crypto_blake2b_ctx ctx; - crypto_blake2b_general_init(&ctx, min(digest_size, 64), 0, 0); + crypto_blake2b_general_init(&ctx, MIN(digest_size, 64), 0, 0); blake_update_32 (&ctx, digest_size); crypto_blake2b_update (&ctx, input, input_size); crypto_blake2b_final (&ctx, digest); diff --git a/src/optional/sha512.c b/src/optional/sha512.c index 8e7d8a4..ae64595 100644 --- a/src/optional/sha512.c +++ b/src/optional/sha512.c @@ -2,6 +2,7 @@ #define FOR(i, min, max) for (size_t i = min; i < max; i++) #define WIPE_CTX(ctx) crypto_wipe(ctx , sizeof(*(ctx))) +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) typedef uint8_t u8; typedef uint64_t u64; @@ -124,6 +125,16 @@ static void sha512_end_block(crypto_sha512_ctx *ctx) } } +static void sha512_update(crypto_sha512_ctx *ctx, + const u8 *message, size_t message_size) +{ + FOR (i, 0, message_size) { + sha512_set_input(ctx, message[i]); + ctx->input_idx++; + sha512_end_block(ctx); + } +} + void crypto_sha512_init(crypto_sha512_ctx *ctx) { ctx->hash[0] = 0x6a09e667f3bcc908; @@ -142,31 +153,25 @@ void crypto_sha512_init(crypto_sha512_ctx *ctx) void crypto_sha512_update(crypto_sha512_ctx *ctx, const u8 *message, size_t message_size) { - // Align ourselves with 8 byte words - while (ctx->input_idx % 8 != 0 && message_size > 0) { - sha512_set_input(ctx, *message); - ctx->input_idx++; - message++; - message_size--; - } - sha512_end_block(ctx); - - // Main processing by 8 byte chunks (much faster) - size_t nb_words = message_size / 8; - size_t remainder = message_size % 8; - FOR (i, 0, nb_words) { - ctx->input[ctx->input_idx / 8] = load64_be(message); - message += 8; - ctx->input_idx += 8; + // Align ourselves with block boundaries + size_t align = MIN(-ctx->input_idx & 127, message_size); + sha512_update(ctx, message, align); + message += align; + message_size -= align; + + // Process the message block by block + FOR (i, 0, message_size / 128) { // number of blocks + FOR (j, 0, 16) { + ctx->input[j] = load64_be(message + j*8); + } + message += 128; + ctx->input_idx += 128; sha512_end_block(ctx); } + message_size &= 127; - // Remaining processing byte by byte - FOR (i, 0, remainder) { - sha512_set_input(ctx, *message); - message++; - ctx->input_idx++; - } + // remaining bytes + sha512_update(ctx, message, message_size); } void crypto_sha512_final(crypto_sha512_ctx *ctx, u8 hash[64])