From: Loup Vaillant Date: Sun, 6 Aug 2017 09:11:23 +0000 (+0200) Subject: added poly1305-donna speed benchmark X-Git-Url: https://git.codecow.com/?a=commitdiff_plain;h=dd85828f5a45614fcc5c96b62b9708f139b42e72;p=Monocypher.git added poly1305-donna speed benchmark --- diff --git a/makefile b/makefile index 1ca8bbd..df5043d 100644 --- a/makefile +++ b/makefile @@ -64,12 +64,17 @@ sodium: tests/sodium.c bin/rename_monocypher.o bin/rename_sha512.o $(CC) $(CFLAGS) -o $@ $^ $(C_SODIUM_FLAGS) $(LD_SODIUM_FLAGS) # Speed benchmark -speed: tests/speed.c bin/rename_monocypher.o bin/rename_sha512.o bin/tweetnacl.o +speed: tests/speed.c bin/rename_monocypher.o bin/rename_sha512.o bin/tweetnacl.o bin/poly-donna.o $(CC) $(CFLAGS) -o $@ $^ $(C_SODIUM_FLAGS) $(LD_SODIUM_FLAGS) bin/tweetnacl.o: tests/tweetnacl/tweetnacl.c tests/tweetnacl/tweetnacl.h $(CC) $(CFLAGS) -o $@ -c $< +bin/poly-donna.o: tests/poly1305-donna/poly1305-donna.c \ + tests/poly1305-donna/poly1305-donna.h \ + tests/poly1305-donna/poly1305-donna-32.h + $(CC) $(CFLAGS) -o $@ -c $< -DPOLY1305_32BIT + # Test edDSA/blake2b by comparing with the donna implementation # Note: we're using Blake2b, the default hash for monocypher edDSA donna: tests/donna.c bin/classic_monocypher.o bin/donna.o diff --git a/tests/poly1305-donna/poly1305-donna-32.h b/tests/poly1305-donna/poly1305-donna-32.h new file mode 100644 index 0000000..6a570f0 --- /dev/null +++ b/tests/poly1305-donna/poly1305-donna-32.h @@ -0,0 +1,219 @@ +/* + poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication and 64 bit addition +*/ + +#if defined(_MSC_VER) + #define POLY1305_NOINLINE __declspec(noinline) +#elif defined(__GNUC__) + #define POLY1305_NOINLINE __attribute__((noinline)) +#else + #define POLY1305_NOINLINE +#endif + +#define poly1305_block_size 16 + +/* 17 + sizeof(size_t) + 14*sizeof(unsigned long) */ +typedef struct poly1305_state_internal_t { + unsigned long r[5]; + unsigned long h[5]; + unsigned long pad[4]; + size_t leftover; + unsigned char buffer[poly1305_block_size]; + unsigned char final; +} poly1305_state_internal_t; + +/* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */ +static unsigned long +U8TO32(const unsigned char *p) { + return + (((unsigned long)(p[0] & 0xff) ) | + ((unsigned long)(p[1] & 0xff) << 8) | + ((unsigned long)(p[2] & 0xff) << 16) | + ((unsigned long)(p[3] & 0xff) << 24)); +} + +/* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */ +static void +U32TO8(unsigned char *p, unsigned long v) { + p[0] = (v ) & 0xff; + p[1] = (v >> 8) & 0xff; + p[2] = (v >> 16) & 0xff; + p[3] = (v >> 24) & 0xff; +} + +void +poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + st->r[0] = (U8TO32(&key[ 0]) ) & 0x3ffffff; + st->r[1] = (U8TO32(&key[ 3]) >> 2) & 0x3ffff03; + st->r[2] = (U8TO32(&key[ 6]) >> 4) & 0x3ffc0ff; + st->r[3] = (U8TO32(&key[ 9]) >> 6) & 0x3f03fff; + st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff; + + /* h = 0 */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; + + /* save pad for later */ + st->pad[0] = U8TO32(&key[16]); + st->pad[1] = U8TO32(&key[20]); + st->pad[2] = U8TO32(&key[24]); + st->pad[3] = U8TO32(&key[28]); + + st->leftover = 0; + st->final = 0; +} + +static void +poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, size_t bytes) { + const unsigned long hibit = (st->final) ? 0 : (1UL << 24); /* 1 << 128 */ + unsigned long r0,r1,r2,r3,r4; + unsigned long s1,s2,s3,s4; + unsigned long h0,h1,h2,h3,h4; + unsigned long long d0,d1,d2,d3,d4; + unsigned long c; + + r0 = st->r[0]; + r1 = st->r[1]; + r2 = st->r[2]; + r3 = st->r[3]; + r4 = st->r[4]; + + s1 = r1 * 5; + s2 = r2 * 5; + s3 = r3 * 5; + s4 = r4 * 5; + + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + while (bytes >= poly1305_block_size) { + /* h += m[i] */ + h0 += (U8TO32(m+ 0) ) & 0x3ffffff; + h1 += (U8TO32(m+ 3) >> 2) & 0x3ffffff; + h2 += (U8TO32(m+ 6) >> 4) & 0x3ffffff; + h3 += (U8TO32(m+ 9) >> 6) & 0x3ffffff; + h4 += (U8TO32(m+12) >> 8) | hibit; + + /* h *= r */ + d0 = ((unsigned long long)h0 * r0) + ((unsigned long long)h1 * s4) + ((unsigned long long)h2 * s3) + ((unsigned long long)h3 * s2) + ((unsigned long long)h4 * s1); + d1 = ((unsigned long long)h0 * r1) + ((unsigned long long)h1 * r0) + ((unsigned long long)h2 * s4) + ((unsigned long long)h3 * s3) + ((unsigned long long)h4 * s2); + d2 = ((unsigned long long)h0 * r2) + ((unsigned long long)h1 * r1) + ((unsigned long long)h2 * r0) + ((unsigned long long)h3 * s4) + ((unsigned long long)h4 * s3); + d3 = ((unsigned long long)h0 * r3) + ((unsigned long long)h1 * r2) + ((unsigned long long)h2 * r1) + ((unsigned long long)h3 * r0) + ((unsigned long long)h4 * s4); + d4 = ((unsigned long long)h0 * r4) + ((unsigned long long)h1 * r3) + ((unsigned long long)h2 * r2) + ((unsigned long long)h3 * r1) + ((unsigned long long)h4 * r0); + + /* (partial) h %= p */ + c = (unsigned long)(d0 >> 26); h0 = (unsigned long)d0 & 0x3ffffff; + d1 += c; c = (unsigned long)(d1 >> 26); h1 = (unsigned long)d1 & 0x3ffffff; + d2 += c; c = (unsigned long)(d2 >> 26); h2 = (unsigned long)d2 & 0x3ffffff; + d3 += c; c = (unsigned long)(d3 >> 26); h3 = (unsigned long)d3 & 0x3ffffff; + d4 += c; c = (unsigned long)(d4 >> 26); h4 = (unsigned long)d4 & 0x3ffffff; + h0 += c * 5; c = (h0 >> 26); h0 = h0 & 0x3ffffff; + h1 += c; + + m += poly1305_block_size; + bytes -= poly1305_block_size; + } + + st->h[0] = h0; + st->h[1] = h1; + st->h[2] = h2; + st->h[3] = h3; + st->h[4] = h4; +} + +POLY1305_NOINLINE void +poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; + unsigned long h0,h1,h2,h3,h4,c; + unsigned long g0,g1,g2,g3,g4; + unsigned long long f; + unsigned long mask; + + /* process the remaining block */ + if (st->leftover) { + size_t i = st->leftover; + st->buffer[i++] = 1; + for (; i < poly1305_block_size; i++) + st->buffer[i] = 0; + st->final = 1; + poly1305_blocks(st, st->buffer, poly1305_block_size); + } + + /* fully carry h */ + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + c = h1 >> 26; h1 = h1 & 0x3ffffff; + h2 += c; c = h2 >> 26; h2 = h2 & 0x3ffffff; + h3 += c; c = h3 >> 26; h3 = h3 & 0x3ffffff; + h4 += c; c = h4 >> 26; h4 = h4 & 0x3ffffff; + h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff; + h1 += c; + + /* compute h + -p */ + g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff; + g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff; + g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff; + g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff; + g4 = h4 + c - (1UL << 26); + + /* select h if h < p, or h + -p if h >= p */ + mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1; + g0 &= mask; + g1 &= mask; + g2 &= mask; + g3 &= mask; + g4 &= mask; + mask = ~mask; + h0 = (h0 & mask) | g0; + h1 = (h1 & mask) | g1; + h2 = (h2 & mask) | g2; + h3 = (h3 & mask) | g3; + h4 = (h4 & mask) | g4; + + /* h = h % (2^128) */ + h0 = ((h0 ) | (h1 << 26)) & 0xffffffff; + h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; + h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; + h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; + + /* mac = (h + pad) % (2^128) */ + f = (unsigned long long)h0 + st->pad[0] ; h0 = (unsigned long)f; + f = (unsigned long long)h1 + st->pad[1] + (f >> 32); h1 = (unsigned long)f; + f = (unsigned long long)h2 + st->pad[2] + (f >> 32); h2 = (unsigned long)f; + f = (unsigned long long)h3 + st->pad[3] + (f >> 32); h3 = (unsigned long)f; + + U32TO8(mac + 0, h0); + U32TO8(mac + 4, h1); + U32TO8(mac + 8, h2); + U32TO8(mac + 12, h3); + + /* zero out the state */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; + st->r[0] = 0; + st->r[1] = 0; + st->r[2] = 0; + st->r[3] = 0; + st->r[4] = 0; + st->pad[0] = 0; + st->pad[1] = 0; + st->pad[2] = 0; + st->pad[3] = 0; +} + diff --git a/tests/poly1305-donna/poly1305-donna.c b/tests/poly1305-donna/poly1305-donna.c new file mode 100644 index 0000000..c1e3c74 --- /dev/null +++ b/tests/poly1305-donna/poly1305-donna.c @@ -0,0 +1,201 @@ +#include "poly1305-donna.h" + +#if defined(POLY1305_8BIT) +#include "poly1305-donna-8.h" +#elif defined(POLY1305_16BIT) +#include "poly1305-donna-16.h" +#elif defined(POLY1305_32BIT) +#include "poly1305-donna-32.h" +#elif defined(POLY1305_64BIT) +#include "poly1305-donna-64.h" +#else + +/* auto detect between 32bit / 64bit */ +#define HAS_SIZEOF_INT128_64BIT (defined(__SIZEOF_INT128__) && defined(__LP64__)) +#define HAS_MSVC_64BIT (defined(_MSC_VER) && defined(_M_X64)) +#define HAS_GCC_4_4_64BIT (defined(__GNUC__) && defined(__LP64__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)))) + +#if (HAS_SIZEOF_INT128_64BIT || HAS_MSVC_64BIT || HAS_GCC_4_4_64BIT) +#include "poly1305-donna-64.h" +#else +#include "poly1305-donna-32.h" +#endif + +#endif + +void +poly1305_update(poly1305_context *ctx, const unsigned char *m, size_t bytes) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; + size_t i; + + /* handle leftover */ + if (st->leftover) { + size_t want = (poly1305_block_size - st->leftover); + if (want > bytes) + want = bytes; + for (i = 0; i < want; i++) + st->buffer[st->leftover + i] = m[i]; + bytes -= want; + m += want; + st->leftover += want; + if (st->leftover < poly1305_block_size) + return; + poly1305_blocks(st, st->buffer, poly1305_block_size); + st->leftover = 0; + } + + /* process full blocks */ + if (bytes >= poly1305_block_size) { + size_t want = (bytes & ~(poly1305_block_size - 1)); + poly1305_blocks(st, m, want); + m += want; + bytes -= want; + } + + /* store leftover */ + if (bytes) { + for (i = 0; i < bytes; i++) + st->buffer[st->leftover + i] = m[i]; + st->leftover += bytes; + } +} + +void +poly1305_auth(unsigned char mac[16], const unsigned char *m, size_t bytes, const unsigned char key[32]) { + poly1305_context ctx; + poly1305_init(&ctx, key); + poly1305_update(&ctx, m, bytes); + poly1305_finish(&ctx, mac); +} + +int +poly1305_verify(const unsigned char mac1[16], const unsigned char mac2[16]) { + size_t i; + unsigned int dif = 0; + for (i = 0; i < 16; i++) + dif |= (mac1[i] ^ mac2[i]); + dif = (dif - 1) >> ((sizeof(unsigned int) * 8) - 1); + return (dif & 1); +} + + +/* test a few basic operations */ +int +poly1305_power_on_self_test(void) { + /* example from nacl */ + static const unsigned char nacl_key[32] = { + 0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91, + 0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25, + 0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65, + 0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80, + }; + + static const unsigned char nacl_msg[131] = { + 0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73, + 0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce, + 0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4, + 0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a, + 0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b, + 0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72, + 0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2, + 0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38, + 0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a, + 0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae, + 0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea, + 0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda, + 0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde, + 0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3, + 0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6, + 0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74, + 0xe3,0x55,0xa5 + }; + + static const unsigned char nacl_mac[16] = { + 0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5, + 0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9 + }; + + /* generates a final value of (2^130 - 2) == 3 */ + static const unsigned char wrap_key[32] = { + 0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + }; + + static const unsigned char wrap_msg[16] = { + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff + }; + + static const unsigned char wrap_mac[16] = { + 0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + }; + + /* + mac of the macs of messages of length 0 to 256, where the key and messages + have all their values set to the length + */ + static const unsigned char total_key[32] = { + 0x01,0x02,0x03,0x04,0x05,0x06,0x07, + 0xff,0xfe,0xfd,0xfc,0xfb,0xfa,0xf9, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff + }; + + static const unsigned char total_mac[16] = { + 0x64,0xaf,0xe2,0xe8,0xd6,0xad,0x7b,0xbd, + 0xd2,0x87,0xf9,0x7c,0x44,0x62,0x3d,0x39 + }; + + poly1305_context ctx; + poly1305_context total_ctx; + unsigned char all_key[32]; + unsigned char all_msg[256]; + unsigned char mac[16]; + size_t i, j; + int result = 1; + + for (i = 0; i < sizeof(mac); i++) + mac[i] = 0; + poly1305_auth(mac, nacl_msg, sizeof(nacl_msg), nacl_key); + result &= poly1305_verify(nacl_mac, mac); + + for (i = 0; i < sizeof(mac); i++) + mac[i] = 0; + poly1305_init(&ctx, nacl_key); + poly1305_update(&ctx, nacl_msg + 0, 32); + poly1305_update(&ctx, nacl_msg + 32, 64); + poly1305_update(&ctx, nacl_msg + 96, 16); + poly1305_update(&ctx, nacl_msg + 112, 8); + poly1305_update(&ctx, nacl_msg + 120, 4); + poly1305_update(&ctx, nacl_msg + 124, 2); + poly1305_update(&ctx, nacl_msg + 126, 1); + poly1305_update(&ctx, nacl_msg + 127, 1); + poly1305_update(&ctx, nacl_msg + 128, 1); + poly1305_update(&ctx, nacl_msg + 129, 1); + poly1305_update(&ctx, nacl_msg + 130, 1); + poly1305_finish(&ctx, mac); + result &= poly1305_verify(nacl_mac, mac); + + for (i = 0; i < sizeof(mac); i++) + mac[i] = 0; + poly1305_auth(mac, wrap_msg, sizeof(wrap_msg), wrap_key); + result &= poly1305_verify(wrap_mac, mac); + + poly1305_init(&total_ctx, total_key); + for (i = 0; i < 256; i++) { + /* set key and message to 'i,i,i..' */ + for (j = 0; j < sizeof(all_key); j++) + all_key[j] = i; + for (j = 0; j < i; j++) + all_msg[j] = i; + poly1305_auth(mac, all_msg, i, all_key); + poly1305_update(&total_ctx, mac, 16); + } + poly1305_finish(&total_ctx, mac); + result &= poly1305_verify(total_mac, mac); + + return result; +} diff --git a/tests/poly1305-donna/poly1305-donna.h b/tests/poly1305-donna/poly1305-donna.h new file mode 100644 index 0000000..94e2353 --- /dev/null +++ b/tests/poly1305-donna/poly1305-donna.h @@ -0,0 +1,20 @@ +#ifndef POLY1305_DONNA_H +#define POLY1305_DONNA_H + +#include + +typedef struct poly1305_context { + size_t aligner; + unsigned char opaque[136]; +} poly1305_context; + +void poly1305_init(poly1305_context *ctx, const unsigned char key[32]); +void poly1305_update(poly1305_context *ctx, const unsigned char *m, size_t bytes); +void poly1305_finish(poly1305_context *ctx, unsigned char mac[16]); +void poly1305_auth(unsigned char mac[16], const unsigned char *m, size_t bytes, const unsigned char key[32]); + +int poly1305_verify(const unsigned char mac1[16], const unsigned char mac2[16]); +int poly1305_power_on_self_test(void); + +#endif /* POLY1305_DONNA_H */ + diff --git a/tests/speed.c b/tests/speed.c index 7d638f3..de55cd4 100644 --- a/tests/speed.c +++ b/tests/speed.c @@ -6,6 +6,7 @@ #include "rename_monocypher.h" #include "rename_sha512.h" #include "tweetnacl/tweetnacl.h" +#include "poly1305-donna/poly1305-donna.h" #define FOR(i, start, end) for (size_t (i) = (start); (i) < (end); (i)++) typedef uint8_t u8; @@ -163,8 +164,8 @@ static speed_t argon2i(void) { size_t nb_blocks = SIZE / 1024; static u8 work_area[SIZE]; - static u8 password [ 16]; p_random(password, 32); - static u8 salt [ 16]; p_random(salt , 32); + static u8 password [ 16]; p_random(password, 16); + static u8 salt [ 16]; p_random(salt , 16); static u8 mono [ 32]; static u8 sodium [ 32]; @@ -416,6 +417,26 @@ static void t_ed25519(void) } } +static speed_t d_poly1305(void) +{ + static u8 in [SIZE]; p_random(in , SIZE); + static u8 key [ 32]; p_random(key , 32); + static u8 mono [ 16]; + static u8 sodium[ 16]; + + TIMING_START(monocypher) { + rename_poly1305_auth(mono, in, SIZE, key); + } + TIMING_END(monocypher); + TIMING_START(libsodium) { + poly1305_auth(sodium, in, SIZE, key); + } + TIMING_END(libsodium); + + TIMING_RESULT("Poly1305", 16); +} + + int main() { printf("\nComparing with Libsodium\n"); @@ -437,6 +458,10 @@ int main() print("x25519 ", t_x25519 (), "TweetNaCl"); t_ed25519 (); + printf("\nComparing with Donna\n"); + printf("----------------------\n"); + print("Poly1305 ", d_poly1305(), "32 bit Poly1305 Donna"); + printf("\n"); return 0; }