From e8ff68ee619b3c8da12a7afe7a76ee0ec3f98c56 Mon Sep 17 00:00:00 2001 From: Loup Vaillant Date: Sun, 29 Jan 2017 22:58:06 +0100 Subject: [PATCH] home made poly1305 --- ae.c | 2 +- poly1305.c | 190 +++++++++++++++++++++++++++++++---------------------- poly1305.h | 31 ++++----- 3 files changed, 126 insertions(+), 97 deletions(-) diff --git a/ae.c b/ae.c index 2313e28..33d2731 100644 --- a/ae.c +++ b/ae.c @@ -35,7 +35,7 @@ crypto_ae_unlock_detached(const uint8_t key[32], uint8_t real_mac[16]; crypto_poly1305_auth(real_mac, ciphertext, text_size, auth_key); - if (crypto_poly1305_verify(real_mac, mac)) + if (crypto_memcmp_16(real_mac, mac)) return -1; crypto_chacha20_encrypt(&e_ctx, ciphertext, plaintext, text_size); diff --git a/poly1305.c b/poly1305.c index ca91526..c124977 100644 --- a/poly1305.c +++ b/poly1305.c @@ -1,110 +1,142 @@ -// Taken from tweetNaCl - #include "poly1305.h" -static void poly1305_add(uint32_t h[17], const uint8_t c[17]) +static uint32_t load32_le(const uint8_t s[4]) { - uint32_t u = 0; - for (int j = 0; j < 17; j++) { - u += h[j] + c[j]; - h[j] = u & 255; - u >>= 8; - } + return s[0] + | (s[1] << 8) + | (s[2] << 16) + | (s[3] << 24); } -static uint32_t poly1305_carry(uint32_t h[17], uint32_t carry) +static void store32_le(uint8_t output[4], uint32_t input) { - for (int i = 0; i < 16; i++) { - carry += h[i]; - h[i] = carry & 255; - carry >>= 8; - } - return carry + h[16]; + output[0] = input & 0xff; + output[1] = (input >> 8) & 0xff; + output[2] = (input >> 16) & 0xff; + output[3] = (input >> 24) & 0xff; } -static void poly1305_block(crypto_poly1305_ctx *ctx) +static void poly_load(uint32_t out[4], const uint8_t in[16]) { - poly1305_add(ctx->h, ctx->c); - uint32_t x[17]; - for (int i = 0; i < 17; i++) { - x[i] = 0; - for (int j = 0 ; j < i + 1; j++) - x[i] += ctx->h[j] * ctx->r[i - j]; - for (int j = i + 1; j < 17 ; j++) - x[i] += ctx->h[j] * 320 * ctx->r[i + 17 - j]; - } - for (int i = 0; i < 17; i++) - ctx->h[i] = x[i]; + for (int i = 0; i < 4; i++) + out[i] = load32_le(in + i*4); +} - uint32_t u = poly1305_carry(ctx->h, 0 ); ctx->h[16] = u & 3; - ctx->h[16] = poly1305_carry(ctx->h, 5 * (u >> 2)); +static void poly_add(uint32_t out[5], const uint32_t a[5], const uint32_t b[5]) +{ + uint64_t carry = 0; + for (int i = 0; i < 5; i++) { + carry += (int64_t)(a[i]) + b[i]; + out[i] = carry & 0xffffffff; // lower 32 bits right there. + carry >>= 32; // retain the carry + } } +// h = (h + c) * r +static void poly_block(crypto_poly1305_ctx *ctx) +{ + // h + c, without carry propagation + const uint64_t h0 = ctx->h[0] + (uint64_t)ctx->c[0]; + const uint64_t h1 = ctx->h[1] + (uint64_t)ctx->c[1]; + const uint64_t h2 = ctx->h[2] + (uint64_t)ctx->c[2]; + const uint64_t h3 = ctx->h[3] + (uint64_t)ctx->c[3]; + const uint64_t h4 = ctx->h[4] + (uint64_t)ctx->c[4]; + + // Local all the things! + const uint64_t r0 = ctx->r[0]; + const uint64_t r1 = ctx->r[1]; + const uint64_t r2 = ctx->r[2]; + const uint64_t r3 = ctx->r[3]; + const uint64_t rr0 = (ctx->r[0] >> 2) * 5; // lose 2 bottom bits... + const uint64_t rr1 = (ctx->r[1] >> 2) * 5; // 2 bottom bits already cleared + const uint64_t rr2 = (ctx->r[2] >> 2) * 5; // 2 bottom bits already cleared + const uint64_t rr3 = (ctx->r[3] >> 2) * 5; // 2 bottom bits already cleared + + // (h + c) * r, without carry propagation + const uint64_t x0 = h0*r0 + h1*rr3 + h2*rr2 + h3*rr1 + h4*rr0; + const uint64_t x1 = h0*r1 + h1*r0 + h2*rr3 + h3*rr2 + h4*rr1; + const uint64_t x2 = h0*r2 + h1*r1 + h2*r0 + h3*rr3 + h4*rr2; + const uint64_t x3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*rr3; + const uint64_t x4 = h4 * (r0 & 3); // ...recover those 2 bits + + // carry propagation, put ctx->h under 2^130 + const uint64_t msb = x4 + (x3 >> 32); + uint64_t u = (msb >> 2) * 5; // lose 2 bottom bits... + u += (x0 & 0xffffffff) ; ctx->h[0] = u & 0xffffffff; u >>= 32; + u += (x1 & 0xffffffff) + (x0 >> 32); ctx->h[1] = u & 0xffffffff; u >>= 32; + u += (x2 & 0xffffffff) + (x1 >> 32); ctx->h[2] = u & 0xffffffff; u >>= 32; + u += (x3 & 0xffffffff) + (x2 >> 32); ctx->h[3] = u & 0xffffffff; u >>= 32; + u += msb & 3 /* ...recover them */ ; ctx->h[4] = u; +} -void -crypto_poly1305_init(crypto_poly1305_ctx *ctx, const uint8_t key[32]) +// (re-)initializes the input counter and input buffer +static void poly_clear_c(crypto_poly1305_ctx *ctx) { - for (int i = 0; i < 17; i++) - ctx->h[i] = 0; - for (int i = 0; i < 16; i++) { - ctx->r [i] = key[i ]; - ctx->pad[i] = key[i + 16]; - } - ctx->r[16] = 0; - ctx->r[3] &= 0x0f; ctx->r[4] &= 0xfc; - ctx->r[7] &= 0x0f; ctx->r[8] &= 0xfc; - ctx->r[11] &= 0x0f; ctx->r[12] &= 0xfc; - ctx->r[15] &= 0x0f; - ctx->c[16] = 1; + for (int i = 0; i < 4; i++) + ctx->c[i] = 0; ctx->c_index = 0; } -void -crypto_poly1305_update(crypto_poly1305_ctx *ctx, const uint8_t *m, size_t bytes) +void crypto_poly1305_init(crypto_poly1305_ctx *ctx, const uint8_t key[32]) +{ + // initial h: zero + for (int i = 0; i < 5; i++) + ctx->h [i] = 0; + // initial r: first half of the key, minus a few bits + poly_load(ctx->r, key); + ctx->r[0] &= 0x0fffffff; // clear top 4 bits + ctx->r[1] &= 0x0ffffffc; // clear top 4 & bottom 2 bits + ctx->r[2] &= 0x0ffffffc; // clear top 4 & bottom 2 bits + ctx->r[3] &= 0x0ffffffc; // clear top 4 & bottom 2 bits + ctx->c[4] = 1; + // second half of the key, saved for later + poly_load(ctx->pad, key + 16); + ctx->pad[4] = 0; + // buffer and counter + poly_clear_c(ctx); +} + +void crypto_poly1305_update(crypto_poly1305_ctx *ctx, + const uint8_t *m, size_t bytes) { while (bytes > 0) { if (ctx->c_index == 16) { - poly1305_block(ctx); - ctx->c_index = 0; + poly_block(ctx); + poly_clear_c(ctx); } - ctx->c[ctx->c_index] = *m; + // feed the input buffer + ctx->c[ctx->c_index / 4] |= *m << ((ctx->c_index % 4) * 8); ctx->c_index++; m++; bytes--; } } -void -crypto_poly1305_finish(crypto_poly1305_ctx *ctx, uint8_t mac[16]) +void crypto_poly1305_finish(crypto_poly1305_ctx *ctx, uint8_t mac[16]) { - // compute last block - ctx->c[ctx->c_index] = 1; - for (int i = ctx->c_index + 1; i < 17; i++) - ctx->c[i] = 0; - poly1305_block(ctx); - - // save h - uint32_t g[17]; - for (int i = 0; i < 17; i++) g[i] = ctx->h[i]; - - // finalise - static const uint8_t minusp[17] = {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,252}; - poly1305_add(ctx->h, minusp); - uint32_t s = -(ctx->h[16] >> 7); - for (int i = 0; i < 17; i++) ctx->h[i] ^= s & (g[i] ^ ctx->h[i]); - for (int i = 0; i < 16; i++) ctx->c[i] = ctx->pad[i]; - ctx->c[16] = 0; - poly1305_add(ctx->h, ctx->c); - - // copy mac - for (int i = 0; i < 16; i++) mac[i] = ctx->h[i]; + // move the final 1 according to remaining input length + ctx->c[4] = 0; + ctx->c[ctx->c_index / 4] |= 1 << ((ctx->c_index % 4) * 8); + // one last hash update... + poly_block(ctx); + // ... this time with full modular reduction + // We only need to conditionally subtract 2^130-5, + // using bit twidling to prevent timing attacks. + static const uint32_t minus_p[5] = { 5, 0, 0, 0, 0xfffffffc }; + uint32_t h_minus_p[5]; + poly_add(h_minus_p, ctx->h, minus_p); + uint32_t negative = ~(-(h_minus_p[4] >> 31)); // 0 or -1 (2's complement) + for (int i = 0; i < 5; i++) { + ctx->h[i] ^= negative & (ctx->h[i] ^ h_minus_p[i]); + } + // Add the secret pad to the final hash before output + poly_add(ctx->h, ctx->h, ctx->pad); + for (int i = 0; i < 4; i++) + store32_le(mac + i*4, ctx->h[i]); } -void -crypto_poly1305_auth(uint8_t mac[16], - const uint8_t *m, - size_t m_size, - const uint8_t key[32]) +void crypto_poly1305_auth(uint8_t mac[16], const uint8_t *m, + size_t m_size , const uint8_t key[32]) { crypto_poly1305_ctx ctx; crypto_poly1305_init (&ctx, key); @@ -112,8 +144,8 @@ crypto_poly1305_auth(uint8_t mac[16], crypto_poly1305_finish(&ctx, mac); } -int -crypto_poly1305_verify(const uint8_t mac1[16], const uint8_t mac2[16]) { +int crypto_memcmp_16(const uint8_t mac1[16], const uint8_t mac2[16]) +{ unsigned diff = 0; for (int i = 0; i < 16; i++) { diff |= (mac1[i] ^ mac2[i]); diff --git a/poly1305.h b/poly1305.h index 1b0bdaa..ce3571e 100644 --- a/poly1305.h +++ b/poly1305.h @@ -5,44 +5,41 @@ #include typedef struct { - uint32_t r[17]; - uint32_t h[17]; - uint8_t c[17]; + uint32_t r[4]; + uint32_t h[5]; + uint32_t c[5]; + uint32_t pad[5]; size_t c_index; - uint8_t pad[16]; } crypto_poly1305_ctx; + // Initializes the poly1305 context with the secret key. // Call first (obviously). // WARNING: NEVER AUTHENTICATE 2 MESSAGES WITH THE SAME KEY. // This is a ONE TIME authenticator. If you authenticate 2 messages // with the same key, the attacker may deduce your secret key and // authenticate messages in your stead. -void -crypto_poly1305_init(crypto_poly1305_ctx *ctx, const uint8_t key[32]); +void crypto_poly1305_init(crypto_poly1305_ctx *ctx, const uint8_t key[32]); // Updates the poly1305 context with a chunk of the message // Can be called multiple times, once for each chunk. // Make sure the chunks are processed in order, without overlap or hole... -void -crypto_poly1305_update(crypto_poly1305_ctx *ctx, const uint8_t *m, size_t bytes); +void crypto_poly1305_update(crypto_poly1305_ctx *ctx, + const uint8_t *m, size_t bytes); // Authenticate the message munched through previous update() calls. // Call last (obviously). -void -crypto_poly1305_finish(crypto_poly1305_ctx *ctx, uint8_t mac[16]); +void crypto_poly1305_finish(crypto_poly1305_ctx *ctx, uint8_t mac[16]); // Convenience all in one function -void -crypto_poly1305_auth(uint8_t mac[16], - const uint8_t *m, - size_t msg_length, - const uint8_t key[32]); +void crypto_poly1305_auth(uint8_t mac[16], + const uint8_t *m, + size_t msg_length, + const uint8_t key[32]); // Constant time equality verification // returns 0 if it matches, something else otherwise. -int -crypto_poly1305_verify(const uint8_t mac1[16], const uint8_t mac2[16]); +int crypto_memcmp_16(const uint8_t mac1[16], const uint8_t mac2[16]); #endif // POLY1305_H -- 2.47.3