From: Loup Vaillant Date: Fri, 24 Mar 2023 00:51:10 +0000 (+0100) Subject: Revert "Modify Blake2b context input to byte buffer" X-Git-Url: https://git.codecow.com/?a=commitdiff_plain;h=94d0f7031874bf532d799653777d7c0358f6b494;p=Monocypher.git Revert "Modify Blake2b context input to byte buffer" This reverts commit 310aab8ddf9b0a31ccc74c06ece8098769cf1231. Turns out I failed to optimise Blake2b, and it was faster before. --- diff --git a/src/monocypher.c b/src/monocypher.c index 7fdebb0..03a53f6 100644 --- a/src/monocypher.c +++ b/src/monocypher.c @@ -494,7 +494,7 @@ static void blake2b_compress(crypto_blake2b_ctx *ctx, int is_last_block) u64 v7 = ctx->hash[7]; u64 v15 = iv[7]; // mangle work vector - u64 *input = (u64*)ctx->input; + u64 *input = ctx->input; #define BLAKE2_G(a, b, c, d, x, y) \ a += b + x; d = rotr64(d ^ a, 32); \ c += d; b = rotr64(b ^ c, 24); \ @@ -538,11 +538,14 @@ void crypto_blake2b_keyed_init(crypto_blake2b_ctx *ctx, size_t hash_size, ctx->input_offset[1] = 0; // beginning of the input, no offset ctx->hash_size = hash_size; ctx->input_idx = 0; - ZERO((u64*)ctx->input, 16); + ZERO(ctx->input, 16); // if there is a key, the first block is that key (padded with zeroes) if (key_size > 0) { - COPY(ctx->input, key, key_size); + u8 key_block[128] = {0}; + COPY(key_block, key, key_size); + // same as calling crypto_blake2b_update(ctx, key_block , 128) + load64_le_buf(ctx->input, key_block, 16); ctx->input_idx = 128; } } @@ -560,22 +563,35 @@ void crypto_blake2b_update(crypto_blake2b_ctx *ctx, return; } - // Align with block boundaries (magic compiler makes it fast) - if ((ctx->input_idx & 127) != 0) { - size_t nb_bytes = MIN(gap(ctx->input_idx, 128), message_size); - COPY(ctx->input + ctx->input_idx, message, nb_bytes); + // Align with word boundaries + if ((ctx->input_idx & 7) != 0) { + size_t nb_bytes = MIN(gap(ctx->input_idx, 8), message_size); + size_t word = ctx->input_idx >> 3; + size_t byte = ctx->input_idx & 7; + FOR (i, 0, nb_bytes) { + ctx->input[word] |= (u64)message[i] << ((byte + i) << 3); + } ctx->input_idx += nb_bytes; message += nb_bytes; message_size -= nb_bytes; } + // Align with block boundaries (faster than byte by byte) + if ((ctx->input_idx & 127) != 0) { + size_t nb_words = MIN(gap(ctx->input_idx, 128), message_size) >> 3; + load64_le_buf(ctx->input + (ctx->input_idx >> 3), message, nb_words); + ctx->input_idx += nb_words << 3; + message += nb_words << 3; + message_size -= nb_words << 3; + } + // Process block by block size_t nb_blocks = message_size >> 7; FOR (i, 0, nb_blocks) { if (ctx->input_idx == 128) { blake2b_compress(ctx, 0); } - COPY(ctx->input, message, 128); + load64_le_buf(ctx->input, message, 16); message += 128; ctx->input_idx = 128; } @@ -588,18 +604,22 @@ void crypto_blake2b_update(crypto_blake2b_ctx *ctx, ctx->input_idx = 0; } if (ctx->input_idx == 0) { - ZERO(ctx->input, 128); + ZERO(ctx->input, 16); } // Fill remaining words (faster than byte by byte) size_t nb_words = message_size >> 3; - COPY(ctx->input, message, nb_words << 3); + load64_le_buf(ctx->input, message, nb_words); ctx->input_idx += nb_words << 3; message += nb_words << 3; message_size -= nb_words << 3; // Fill remaining bytes - COPY(ctx->input + ctx->input_idx, message, message_size); - ctx->input_idx += message_size; + FOR (i, 0, message_size) { + size_t word = ctx->input_idx >> 3; + size_t byte = ctx->input_idx & 7; + ctx->input[word] |= (u64)message[i] << (byte << 3); + ctx->input_idx++; + } } } diff --git a/src/monocypher.h b/src/monocypher.h index c73b4d4..cf635e8 100644 --- a/src/monocypher.h +++ b/src/monocypher.h @@ -136,7 +136,7 @@ typedef struct { // for they may change without notice. uint64_t hash[8]; uint64_t input_offset[2]; - uint8_t input[128]; + uint64_t input[16]; size_t input_idx; size_t hash_size; } crypto_blake2b_ctx; diff --git a/tests/speed/speed.c b/tests/speed/speed.c index 1a44c95..1a7e9dd 100644 --- a/tests/speed/speed.c +++ b/tests/speed/speed.c @@ -122,22 +122,6 @@ static u64 blake2b_small(void) TIMING_END; } -static u64 blake2b_blocks(void) -{ - u8 hash[64]; - RANDOM_INPUT(input, 32); - - TIMING_START { - crypto_blake2b_ctx ctx; - crypto_blake2b_init(&ctx, 64); - FOR (i, 0, 1000) { - crypto_blake2b_update(&ctx, input, 32); - } - crypto_blake2b_final(&ctx, hash); - } - TIMING_END; -} - static u64 sha512(void) { u8 hash[64]; @@ -272,7 +256,6 @@ int main() print("Auth'd encryption ",authenticated()*MUL ,"megabytes per second"); print("BLAKE2b ",blake2b() *MUL ,"megabytes per second"); print("BLAKE2b (small) ",blake2b_small() ,"cycles per second"); - print("BLAKE2b (32B blocks)",blake2b_blocks() ,"cycles per second"); print("SHA-512 ",sha512() *MUL ,"megabytes per second"); print("SHA-512 (small) ",sha512_small() ,"cycles per second"); print("Argon2i, 3 passes ",argon2i() *MUL ,"megabytes per second");