-// Taken from tweetNaCl
-
#include "poly1305.h"
-static void poly1305_add(uint32_t h[17], const uint8_t c[17])
+static uint32_t load32_le(const uint8_t s[4])
{
- uint32_t u = 0;
- for (int j = 0; j < 17; j++) {
- u += h[j] + c[j];
- h[j] = u & 255;
- u >>= 8;
- }
+ return s[0]
+ | (s[1] << 8)
+ | (s[2] << 16)
+ | (s[3] << 24);
}
-static uint32_t poly1305_carry(uint32_t h[17], uint32_t carry)
+static void store32_le(uint8_t output[4], uint32_t input)
{
- for (int i = 0; i < 16; i++) {
- carry += h[i];
- h[i] = carry & 255;
- carry >>= 8;
- }
- return carry + h[16];
+ output[0] = input & 0xff;
+ output[1] = (input >> 8) & 0xff;
+ output[2] = (input >> 16) & 0xff;
+ output[3] = (input >> 24) & 0xff;
}
-static void poly1305_block(crypto_poly1305_ctx *ctx)
+static void poly_load(uint32_t out[4], const uint8_t in[16])
{
- poly1305_add(ctx->h, ctx->c);
- uint32_t x[17];
- for (int i = 0; i < 17; i++) {
- x[i] = 0;
- for (int j = 0 ; j < i + 1; j++)
- x[i] += ctx->h[j] * ctx->r[i - j];
- for (int j = i + 1; j < 17 ; j++)
- x[i] += ctx->h[j] * 320 * ctx->r[i + 17 - j];
- }
- for (int i = 0; i < 17; i++)
- ctx->h[i] = x[i];
+ for (int i = 0; i < 4; i++)
+ out[i] = load32_le(in + i*4);
+}
- uint32_t u = poly1305_carry(ctx->h, 0 ); ctx->h[16] = u & 3;
- ctx->h[16] = poly1305_carry(ctx->h, 5 * (u >> 2));
+static void poly_add(uint32_t out[5], const uint32_t a[5], const uint32_t b[5])
+{
+ uint64_t carry = 0;
+ for (int i = 0; i < 5; i++) {
+ carry += (int64_t)(a[i]) + b[i];
+ out[i] = carry & 0xffffffff; // lower 32 bits right there.
+ carry >>= 32; // retain the carry
+ }
}
+// h = (h + c) * r
+static void poly_block(crypto_poly1305_ctx *ctx)
+{
+ // h + c, without carry propagation
+ const uint64_t h0 = ctx->h[0] + (uint64_t)ctx->c[0];
+ const uint64_t h1 = ctx->h[1] + (uint64_t)ctx->c[1];
+ const uint64_t h2 = ctx->h[2] + (uint64_t)ctx->c[2];
+ const uint64_t h3 = ctx->h[3] + (uint64_t)ctx->c[3];
+ const uint64_t h4 = ctx->h[4] + (uint64_t)ctx->c[4];
+
+ // Local all the things!
+ const uint64_t r0 = ctx->r[0];
+ const uint64_t r1 = ctx->r[1];
+ const uint64_t r2 = ctx->r[2];
+ const uint64_t r3 = ctx->r[3];
+ const uint64_t rr0 = (ctx->r[0] >> 2) * 5; // lose 2 bottom bits...
+ const uint64_t rr1 = (ctx->r[1] >> 2) * 5; // 2 bottom bits already cleared
+ const uint64_t rr2 = (ctx->r[2] >> 2) * 5; // 2 bottom bits already cleared
+ const uint64_t rr3 = (ctx->r[3] >> 2) * 5; // 2 bottom bits already cleared
+
+ // (h + c) * r, without carry propagation
+ const uint64_t x0 = h0*r0 + h1*rr3 + h2*rr2 + h3*rr1 + h4*rr0;
+ const uint64_t x1 = h0*r1 + h1*r0 + h2*rr3 + h3*rr2 + h4*rr1;
+ const uint64_t x2 = h0*r2 + h1*r1 + h2*r0 + h3*rr3 + h4*rr2;
+ const uint64_t x3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*rr3;
+ const uint64_t x4 = h4 * (r0 & 3); // ...recover those 2 bits
+
+ // carry propagation, put ctx->h under 2^130
+ const uint64_t msb = x4 + (x3 >> 32);
+ uint64_t u = (msb >> 2) * 5; // lose 2 bottom bits...
+ u += (x0 & 0xffffffff) ; ctx->h[0] = u & 0xffffffff; u >>= 32;
+ u += (x1 & 0xffffffff) + (x0 >> 32); ctx->h[1] = u & 0xffffffff; u >>= 32;
+ u += (x2 & 0xffffffff) + (x1 >> 32); ctx->h[2] = u & 0xffffffff; u >>= 32;
+ u += (x3 & 0xffffffff) + (x2 >> 32); ctx->h[3] = u & 0xffffffff; u >>= 32;
+ u += msb & 3 /* ...recover them */ ; ctx->h[4] = u;
+}
-void
-crypto_poly1305_init(crypto_poly1305_ctx *ctx, const uint8_t key[32])
+// (re-)initializes the input counter and input buffer
+static void poly_clear_c(crypto_poly1305_ctx *ctx)
{
- for (int i = 0; i < 17; i++)
- ctx->h[i] = 0;
- for (int i = 0; i < 16; i++) {
- ctx->r [i] = key[i ];
- ctx->pad[i] = key[i + 16];
- }
- ctx->r[16] = 0;
- ctx->r[3] &= 0x0f; ctx->r[4] &= 0xfc;
- ctx->r[7] &= 0x0f; ctx->r[8] &= 0xfc;
- ctx->r[11] &= 0x0f; ctx->r[12] &= 0xfc;
- ctx->r[15] &= 0x0f;
- ctx->c[16] = 1;
+ for (int i = 0; i < 4; i++)
+ ctx->c[i] = 0;
ctx->c_index = 0;
}
-void
-crypto_poly1305_update(crypto_poly1305_ctx *ctx, const uint8_t *m, size_t bytes)
+void crypto_poly1305_init(crypto_poly1305_ctx *ctx, const uint8_t key[32])
+{
+ // initial h: zero
+ for (int i = 0; i < 5; i++)
+ ctx->h [i] = 0;
+ // initial r: first half of the key, minus a few bits
+ poly_load(ctx->r, key);
+ ctx->r[0] &= 0x0fffffff; // clear top 4 bits
+ ctx->r[1] &= 0x0ffffffc; // clear top 4 & bottom 2 bits
+ ctx->r[2] &= 0x0ffffffc; // clear top 4 & bottom 2 bits
+ ctx->r[3] &= 0x0ffffffc; // clear top 4 & bottom 2 bits
+ ctx->c[4] = 1;
+ // second half of the key, saved for later
+ poly_load(ctx->pad, key + 16);
+ ctx->pad[4] = 0;
+ // buffer and counter
+ poly_clear_c(ctx);
+}
+
+void crypto_poly1305_update(crypto_poly1305_ctx *ctx,
+ const uint8_t *m, size_t bytes)
{
while (bytes > 0) {
if (ctx->c_index == 16) {
- poly1305_block(ctx);
- ctx->c_index = 0;
+ poly_block(ctx);
+ poly_clear_c(ctx);
}
- ctx->c[ctx->c_index] = *m;
+ // feed the input buffer
+ ctx->c[ctx->c_index / 4] |= *m << ((ctx->c_index % 4) * 8);
ctx->c_index++;
m++;
bytes--;
}
}
-void
-crypto_poly1305_finish(crypto_poly1305_ctx *ctx, uint8_t mac[16])
+void crypto_poly1305_finish(crypto_poly1305_ctx *ctx, uint8_t mac[16])
{
- // compute last block
- ctx->c[ctx->c_index] = 1;
- for (int i = ctx->c_index + 1; i < 17; i++)
- ctx->c[i] = 0;
- poly1305_block(ctx);
-
- // save h
- uint32_t g[17];
- for (int i = 0; i < 17; i++) g[i] = ctx->h[i];
-
- // finalise
- static const uint8_t minusp[17] = {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,252};
- poly1305_add(ctx->h, minusp);
- uint32_t s = -(ctx->h[16] >> 7);
- for (int i = 0; i < 17; i++) ctx->h[i] ^= s & (g[i] ^ ctx->h[i]);
- for (int i = 0; i < 16; i++) ctx->c[i] = ctx->pad[i];
- ctx->c[16] = 0;
- poly1305_add(ctx->h, ctx->c);
-
- // copy mac
- for (int i = 0; i < 16; i++) mac[i] = ctx->h[i];
+ // move the final 1 according to remaining input length
+ ctx->c[4] = 0;
+ ctx->c[ctx->c_index / 4] |= 1 << ((ctx->c_index % 4) * 8);
+ // one last hash update...
+ poly_block(ctx);
+ // ... this time with full modular reduction
+ // We only need to conditionally subtract 2^130-5,
+ // using bit twidling to prevent timing attacks.
+ static const uint32_t minus_p[5] = { 5, 0, 0, 0, 0xfffffffc };
+ uint32_t h_minus_p[5];
+ poly_add(h_minus_p, ctx->h, minus_p);
+ uint32_t negative = ~(-(h_minus_p[4] >> 31)); // 0 or -1 (2's complement)
+ for (int i = 0; i < 5; i++) {
+ ctx->h[i] ^= negative & (ctx->h[i] ^ h_minus_p[i]);
+ }
+ // Add the secret pad to the final hash before output
+ poly_add(ctx->h, ctx->h, ctx->pad);
+ for (int i = 0; i < 4; i++)
+ store32_le(mac + i*4, ctx->h[i]);
}
-void
-crypto_poly1305_auth(uint8_t mac[16],
- const uint8_t *m,
- size_t m_size,
- const uint8_t key[32])
+void crypto_poly1305_auth(uint8_t mac[16], const uint8_t *m,
+ size_t m_size , const uint8_t key[32])
{
crypto_poly1305_ctx ctx;
crypto_poly1305_init (&ctx, key);
crypto_poly1305_finish(&ctx, mac);
}
-int
-crypto_poly1305_verify(const uint8_t mac1[16], const uint8_t mac2[16]) {
+int crypto_memcmp_16(const uint8_t mac1[16], const uint8_t mac2[16])
+{
unsigned diff = 0;
for (int i = 0; i < 16; i++) {
diff |= (mac1[i] ^ mac2[i]);
#include <stddef.h>
typedef struct {
- uint32_t r[17];
- uint32_t h[17];
- uint8_t c[17];
+ uint32_t r[4];
+ uint32_t h[5];
+ uint32_t c[5];
+ uint32_t pad[5];
size_t c_index;
- uint8_t pad[16];
} crypto_poly1305_ctx;
+
// Initializes the poly1305 context with the secret key.
// Call first (obviously).
// WARNING: NEVER AUTHENTICATE 2 MESSAGES WITH THE SAME KEY.
// This is a ONE TIME authenticator. If you authenticate 2 messages
// with the same key, the attacker may deduce your secret key and
// authenticate messages in your stead.
-void
-crypto_poly1305_init(crypto_poly1305_ctx *ctx, const uint8_t key[32]);
+void crypto_poly1305_init(crypto_poly1305_ctx *ctx, const uint8_t key[32]);
// Updates the poly1305 context with a chunk of the message
// Can be called multiple times, once for each chunk.
// Make sure the chunks are processed in order, without overlap or hole...
-void
-crypto_poly1305_update(crypto_poly1305_ctx *ctx, const uint8_t *m, size_t bytes);
+void crypto_poly1305_update(crypto_poly1305_ctx *ctx,
+ const uint8_t *m, size_t bytes);
// Authenticate the message munched through previous update() calls.
// Call last (obviously).
-void
-crypto_poly1305_finish(crypto_poly1305_ctx *ctx, uint8_t mac[16]);
+void crypto_poly1305_finish(crypto_poly1305_ctx *ctx, uint8_t mac[16]);
// Convenience all in one function
-void
-crypto_poly1305_auth(uint8_t mac[16],
- const uint8_t *m,
- size_t msg_length,
- const uint8_t key[32]);
+void crypto_poly1305_auth(uint8_t mac[16],
+ const uint8_t *m,
+ size_t msg_length,
+ const uint8_t key[32]);
// Constant time equality verification
// returns 0 if it matches, something else otherwise.
-int
-crypto_poly1305_verify(const uint8_t mac1[16], const uint8_t mac2[16]);
+int crypto_memcmp_16(const uint8_t mac1[16], const uint8_t mac2[16]);
#endif // POLY1305_H