From 0381b36ac7c4de0e286fb7d71c5ead81aee204a9 Mon Sep 17 00:00:00 2001 From: Chris Duncan Date: Fri, 27 Feb 2026 00:03:35 -0800 Subject: [PATCH] Pointers, vectors, and doubling. Use pointers for field element constants and simplify scalarbase copy setup. Implement point doubling function. Vectorize selection logic and use built-in for swapping bits. Tweak error handling, maybe fix liftString at some point. Use immediate offsets instead of pointer arithmetic where possible. Pass pointers directly to scalarmult, add, and cswap. --- assembly/nano-nacl.ts | 633 ++++++++++++++++++++++++------------------ index.ts | 9 +- 2 files changed, 371 insertions(+), 271 deletions(-) diff --git a/assembly/nano-nacl.ts b/assembly/nano-nacl.ts index 87b811a..1b816d0 100644 --- a/assembly/nano-nacl.ts +++ b/assembly/nano-nacl.ts @@ -189,12 +189,14 @@ const PUBLICKEY_BYTES: i32 = 32 const SECRETKEY_BYTES: i32 = PRIVATEKEY_BYTES + PUBLICKEY_BYTES const SIGNATURE_BYTES: i32 = 64 const SIGNEDBLOCKHASH_BYTES: i32 = SIGNATURE_BYTES + BLOCKHASH_BYTES -const D: StaticArray = StaticArray.fromArray([0x78a3, 0x1359, 0x4dca, 0x75eb, 0xd8ab, 0x4141, 0x0a4d, 0x0070, 0xe898, 0x7779, 0x4079, 0x8cc7, 0xfe73, 0x2b6f, 0x6cee, 0x5203]) -const D2: StaticArray = StaticArray.fromArray([0xf159, 0x26b2, 0x9b94, 0xebd6, 0xb156, 0x8283, 0x149a, 0x00e0, 0xd130, 0xeef3, 0x80f2, 0x198e, 0xfce7, 0x56df, 0xd9dc, 0x2406]) -const X: StaticArray = StaticArray.fromArray([0xd51a, 0x8f25, 0x2d60, 0xc956, 0xa7b2, 0x9525, 0xc760, 0x692c, 0xdc5c, 0xfdd6, 0xe231, 0xc0a4, 0x53fe, 0xcd6e, 0x36d3, 0x2169]) -const Y: StaticArray = StaticArray.fromArray([0x6658, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666]) -const I: StaticArray = StaticArray.fromArray([0xa0b0, 0x4a0e, 0x1b27, 0xc4ee, 0xe478, 0xad2f, 0x1806, 0x2f43, 0xd7a7, 0x3dfb, 0x0099, 0x2b4d, 0xdf0b, 0x4fc1, 0x2480, 0x2b83]) -const XY: StaticArray = StaticArray.fromArray([0xdd90, 0xa5b7, 0x8ab3, 0x6dde, 0x52f5, 0x7751, 0x9f80, 0x20f0, 0xe37d, 0x64ab, 0x4e8e, 0x66ea, 0x7665, 0xd78b, 0x5f0f, 0xe787]) + +const ZERO = changetype(new StaticArray(16)) +const D = changetype(StaticArray.fromArray([0x78a3, 0x1359, 0x4dca, 0x75eb, 0xd8ab, 0x4141, 0x0a4d, 0x0070, 0xe898, 0x7779, 0x4079, 0x8cc7, 0xfe73, 0x2b6f, 0x6cee, 0x5203])) +const D2 = changetype(StaticArray.fromArray([0xf159, 0x26b2, 0x9b94, 0xebd6, 0xb156, 0x8283, 0x149a, 0x00e0, 0xd130, 0xeef3, 0x80f2, 0x198e, 0xfce7, 0x56df, 0xd9dc, 0x2406])) +const X = changetype(StaticArray.fromArray([0xd51a, 0x8f25, 0x2d60, 0xc956, 0xa7b2, 0x9525, 0xc760, 0x692c, 0xdc5c, 0xfdd6, 0xe231, 0xc0a4, 0x53fe, 0xcd6e, 0x36d3, 0x2169])) +const Y = changetype(StaticArray.fromArray([0x6658, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666])) +const I = changetype(StaticArray.fromArray([0xa0b0, 0x4a0e, 0x1b27, 0xc4ee, 0xe478, 0xad2f, 0x1806, 0x2f43, 0xd7a7, 0x3dfb, 0x0099, 0x2b4d, 0xdf0b, 0x4fc1, 0x2480, 0x2b83])) +const XY = changetype(StaticArray.fromArray([0xdd90, 0xa5b7, 0x8ab3, 0x6dde, 0x52f5, 0x7751, 0x9f80, 0x20f0, 0xe37d, 0x64ab, 0x4e8e, 0x66ea, 0x7665, 0xd78b, 0x5f0f, 0xe787])) // Static I/O buffers const INPUT_BUFFER = memory.data(128) @@ -208,11 +210,12 @@ function vn (x: StaticArray, y: StaticArray): bool { return d !== 0 } +// o, a = StaticArray[16] const pow_c: StaticArray = new StaticArray(16) -function pow2523 (o: StaticArray, a: StaticArray): void { - const c = pow_c +function pow2523 (o: usize, a: usize): void { + const c = changetype(pow_c) for (let i = 0; i < 16; i++) { - c[i] = a[i] + store(c + (i << 2), load(a + (i << 2))) } for (let i = 0; i < 249; i++) { Square(c, c) @@ -222,7 +225,7 @@ function pow2523 (o: StaticArray, a: StaticArray): void { Square(c, c) Multiply(c, c, a) for (let i = 0; i < 16; i++) { - o[i] = c[i] + store(o + (i << 2), load(c + (i << 2))) } } @@ -236,11 +239,12 @@ function car25519 (o: StaticArray): void { o[0] += 38 * c } +// a = StaticArray[16] const inv_c: StaticArray = new StaticArray(16) -function inv25519 (o: StaticArray, a: StaticArray): void { - const c = inv_c +function inv25519 (o: StaticArray, a: usize): void { + const c = changetype(inv_c) for (let i = 0; i < 16; i++) { - c[i] = a[i] + store(c + (i << 2), load(a + (i << 2))) } for (let i = 0; i < 249; i++) { Square(c, c) @@ -255,10 +259,11 @@ function inv25519 (o: StaticArray, a: StaticArray): void { Square(c, c) Multiply(c, c, a) for (let i = 0; i < 16; i++) { - o[i] = c[i] + o[i] = load(c + (i << 2)) } } +// a, b = StaticArray[16] const neq_c = new StaticArray(32) const neq_d = new StaticArray(32) function neq25519 (a: StaticArray, b: StaticArray): bool { @@ -293,7 +298,7 @@ function pack25519 (o: StaticArray, n: StaticArray): void { m[15] = t[15] - 0x7fff - ((m[14] >> 16) & 1) b = (m[15] >> 16) & 1 m[14] &= 0xffff - sel25519(t, m, 1 - b) + sel25519(changetype(t), changetype(m), 1 - b) } for (let i = 0; i < 16; i++) { @@ -303,76 +308,92 @@ function pack25519 (o: StaticArray, n: StaticArray): void { } const par_d = new StaticArray(32) -function par25519 (a: StaticArray): u8 { +function par25519 (a: usize): u8 { const d = par_d - pack25519(d, a) + pack25519(d, changetype>(a)) return d[0] & 1 } -function sel25519 (p: StaticArray, q: StaticArray, b: i32): void { - let t: i32 = 0 - const c: i32 = ~(b - 1) - for (let i = 0; i < 16; i++) { - t = c & (p[i] ^ q[i]) - p[i] ^= t - q[i] ^= t - } +// p, q = StaticArray[16] +@inline +function sel25519 (p: usize, q: usize, b: i32): void { + const c = v128.splat(0 - b) + + let pi = v128.load(p) + let qi = v128.load(q) + v128.store(p, v128.bitselect(qi, pi, c)) + v128.store(q, v128.bitselect(pi, qi, c)) + + pi = v128.load(p, 16) + qi = v128.load(q, 16) + v128.store(p, v128.bitselect(qi, pi, c), 16) + v128.store(q, v128.bitselect(pi, qi, c), 16) + + pi = v128.load(p, 32) + qi = v128.load(q, 32) + v128.store(p, v128.bitselect(qi, pi, c), 32) + v128.store(q, v128.bitselect(pi, qi, c), 32) + + pi = v128.load(p, 48) + qi = v128.load(q, 48) + v128.store(p, v128.bitselect(qi, pi, c), 48) + v128.store(q, v128.bitselect(pi, qi, c), 48) } -function unpack25519 (o: StaticArray, n: StaticArray): void { +function unpack25519 (oo: usize, n: StaticArray): void { + const o = changetype>(oo) for (let i = 0; i < 16; i++) { o[i] = i32(n[i << 1]) + (i32(n[(i << 1) + 1]) << 8) } o[15] &= (1 << 15) - 1 } +// o, a, b = StaticArray[16] @inline -function Add (o: StaticArray, a: StaticArray, b: StaticArray): void { - o[0] = a[0] + b[0] - o[1] = a[1] + b[1] - o[2] = a[2] + b[2] - o[3] = a[3] + b[3] - o[4] = a[4] + b[4] - o[5] = a[5] + b[5] - o[6] = a[6] + b[6] - o[7] = a[7] + b[7] - o[8] = a[8] + b[8] - o[9] = a[9] + b[9] - o[10] = a[10] + b[10] - o[11] = a[11] + b[11] - o[12] = a[12] + b[12] - o[13] = a[13] + b[13] - o[14] = a[14] + b[14] - o[15] = a[15] + b[15] +function Add (o: usize, a: usize, b: usize): void { + store(o, load(a) + load(b)) + store(o, load(a, 4) + load(b, 4), 4) + store(o, load(a, 8) + load(b, 8), 8) + store(o, load(a, 12) + load(b, 12), 12) + store(o, load(a, 16) + load(b, 16), 16) + store(o, load(a, 20) + load(b, 20), 20) + store(o, load(a, 24) + load(b, 24), 24) + store(o, load(a, 28) + load(b, 28), 28) + store(o, load(a, 32) + load(b, 32), 32) + store(o, load(a, 36) + load(b, 36), 36) + store(o, load(a, 40) + load(b, 40), 40) + store(o, load(a, 44) + load(b, 44), 44) + store(o, load(a, 48) + load(b, 48), 48) + store(o, load(a, 52) + load(b, 52), 52) + store(o, load(a, 56) + load(b, 56), 56) + store(o, load(a, 60) + load(b, 60), 60) } +// o, a, b = StaticArray[16] @inline -function Subtract (o: StaticArray, a: StaticArray, b: StaticArray): void { - o[0] = a[0] - b[0] - o[1] = a[1] - b[1] - o[2] = a[2] - b[2] - o[3] = a[3] - b[3] - o[4] = a[4] - b[4] - o[5] = a[5] - b[5] - o[6] = a[6] - b[6] - o[7] = a[7] - b[7] - o[8] = a[8] - b[8] - o[9] = a[9] - b[9] - o[10] = a[10] - b[10] - o[11] = a[11] - b[11] - o[12] = a[12] - b[12] - o[13] = a[13] - b[13] - o[14] = a[14] - b[14] - o[15] = a[15] - b[15] +function Subtract (o: usize, a: usize, b: usize): void { + store(o, load(a) - load(b)) + store(o, load(a, 4) - load(b, 4), 4) + store(o, load(a, 8) - load(b, 8), 8) + store(o, load(a, 12) - load(b, 12), 12) + store(o, load(a, 16) - load(b, 16), 16) + store(o, load(a, 20) - load(b, 20), 20) + store(o, load(a, 24) - load(b, 24), 24) + store(o, load(a, 28) - load(b, 28), 28) + store(o, load(a, 32) - load(b, 32), 32) + store(o, load(a, 36) - load(b, 36), 36) + store(o, load(a, 40) - load(b, 40), 40) + store(o, load(a, 44) - load(b, 44), 44) + store(o, load(a, 48) - load(b, 48), 48) + store(o, load(a, 52) - load(b, 52), 52) + store(o, load(a, 56) - load(b, 56), 56) + store(o, load(a, 60) - load(b, 60), 60) } +// o, a, b = StaticArray[16] const multiply_t = new StaticArray(32) -function Multiply (oo: StaticArray, aa: StaticArray, bb: StaticArray): void { - const a = changetype(aa) - const b = changetype(bb) - const o = changetype(oo) +function Multiply (o: usize, a: usize, b: usize): void { const t = changetype(multiply_t.fill(0)) - const b0 = v128.load(b) const b4 = v128.load(b + 16) const b8 = v128.load(b + 32) @@ -425,7 +446,8 @@ function Multiply (oo: StaticArray, aa: StaticArray, bb: StaticArray, a: StaticArray): void { +// o, a = StaticArray[16] +function Square (o: usize, a: usize): void { Multiply(o, a, a) } @@ -444,128 +466,128 @@ function Square (o: StaticArray, a: StaticArray): void { function Normalize (o: usize, t: usize): void { // reduce let x = load(t) - let y = load(t + 128) + let y = load(t, 128) store(t, x + (38 * y)) - x = load(t + 8) - y = load(t + 136) - store(t + 8, x + (38 * y)) + x = load(t, 8) + y = load(t, 136) + store(t, x + (38 * y), 8) - x = load(t + 16) - y = load(t + 144) - store(t + 16, x + (38 * y)) + x = load(t, 16) + y = load(t, 144) + store(t, x + (38 * y), 16) - x = load(t + 24) - y = load(t + 152) - store(t + 24, x + (38 * y)) + x = load(t, 24) + y = load(t, 152) + store(t, x + (38 * y), 24) - x = load(t + 32) - y = load(t + 160) - store(t + 32, x + (38 * y)) + x = load(t, 32) + y = load(t, 160) + store(t, x + (38 * y), 32) - x = load(t + 40) - y = load(t + 168) - store(t + 40, x + (38 * y)) + x = load(t, 40) + y = load(t, 168) + store(t, x + (38 * y), 40) - x = load(t + 48) - y = load(t + 176) - store(t + 48, x + (38 * y)) + x = load(t, 48) + y = load(t, 176) + store(t, x + (38 * y), 48) - x = load(t + 56) - y = load(t + 184) - store(t + 56, x + (38 * y)) + x = load(t, 56) + y = load(t, 184) + store(t, x + (38 * y), 56) - x = load(t + 64) - y = load(t + 192) - store(t + 64, x + (38 * y)) + x = load(t, 64) + y = load(t, 192) + store(t, x + (38 * y), 64) - x = load(t + 72) - y = load(t + 200) - store(t + 72, x + (38 * y)) + x = load(t, 72) + y = load(t, 200) + store(t, x + (38 * y), 72) - x = load(t + 80) - y = load(t + 208) - store(t + 80, x + (38 * y)) + x = load(t, 80) + y = load(t, 208) + store(t, x + (38 * y), 80) - x = load(t + 88) - y = load(t + 216) - store(t + 88, x + (38 * y)) + x = load(t, 88) + y = load(t, 216) + store(t, x + (38 * y), 88) - x = load(t + 96) - y = load(t + 224) - store(t + 96, x + (38 * y)) + x = load(t, 96) + y = load(t, 224) + store(t, x + (38 * y), 96) - x = load(t + 104) - y = load(t + 232) - store(t + 104, x + (38 * y)) + x = load(t, 104) + y = load(t, 232) + store(t, x + (38 * y), 104) - x = load(t + 112) - y = load(t + 240) - store(t + 112, x + (38 * y)) + x = load(t, 112) + y = load(t, 240) + store(t, x + (38 * y), 112) // first carry let c: i64 = load(t) store(t, c & 0xFFFF) c >>= 16 - c += load(t + 8) - store(t + 8, c & 0xFFFF) + c += load(t, 8) + store(t, c & 0xFFFF, 8) c >>= 16 - c += load(t + 16) - store(t + 16, c & 0xFFFF) + c += load(t, 16) + store(t, c & 0xFFFF, 16) c >>= 16 - c += load(t + 24) - store(t + 24, c & 0xFFFF) + c += load(t, 24) + store(t, c & 0xFFFF, 24) c >>= 16 - c += load(t + 32) - store(t + 32, c & 0xFFFF) + c += load(t, 32) + store(t, c & 0xFFFF, 32) c >>= 16 - c += load(t + 40) - store(t + 40, c & 0xFFFF) + c += load(t, 40) + store(t, c & 0xFFFF, 40) c >>= 16 - c += load(t + 48) - store(t + 48, c & 0xFFFF) + c += load(t, 48) + store(t, c & 0xFFFF, 48) c >>= 16 - c += load(t + 56) - store(t + 56, c & 0xFFFF) + c += load(t, 56) + store(t, c & 0xFFFF, 56) c >>= 16 - c += load(t + 64) - store(t + 64, c & 0xFFFF) + c += load(t, 64) + store(t, c & 0xFFFF, 64) c >>= 16 - c += load(t + 72) - store(t + 72, c & 0xFFFF) + c += load(t, 72) + store(t, c & 0xFFFF, 72) c >>= 16 - c += load(t + 80) - store(t + 80, c & 0xFFFF) + c += load(t, 80) + store(t, c & 0xFFFF, 80) c >>= 16 - c += load(t + 88) - store(t + 88, c & 0xFFFF) + c += load(t, 88) + store(t, c & 0xFFFF, 88) c >>= 16 - c += load(t + 96) - store(t + 96, c & 0xFFFF) + c += load(t, 96) + store(t, c & 0xFFFF, 96) c >>= 16 - c += load(t + 104) - store(t + 104, c & 0xFFFF) + c += load(t, 104) + store(t, c & 0xFFFF, 104) c >>= 16 - c += load(t + 112) - store(t + 112, c & 0xFFFF) + c += load(t, 112) + store(t, c & 0xFFFF, 112) c >>= 16 - c += load(t + 120) - store(t + 120, c & 0xFFFF) + c += load(t, 120) + store(t, c & 0xFFFF, 120) c >>= 16 store(t, load(t) + (38 * c)) @@ -575,146 +597,215 @@ function Normalize (o: usize, t: usize): void { store(o, c & 0xFFFF) c >>= 16 - c += load(t + 8) - store(o + 4, c & 0xFFFF) + c += load(t, 8) + store(o, c & 0xFFFF, 4) c >>= 16 - c += load(t + 16) - store(o + 8, c & 0xFFFF) + c += load(t, 16) + store(o, c & 0xFFFF, 8) c >>= 16 - c += load(t + 24) - store(o + 12, c & 0xFFFF) + c += load(t, 24) + store(o, c & 0xFFFF, 12) c >>= 16 - c += load(t + 32) - store(o + 16, c & 0xFFFF) + c += load(t, 32) + store(o, c & 0xFFFF, 16) c >>= 16 - c += load(t + 40) - store(o + 20, c & 0xFFFF) + c += load(t, 40) + store(o, c & 0xFFFF, 20) c >>= 16 - c += load(t + 48) - store(o + 24, c & 0xFFFF) + c += load(t, 48) + store(o, c & 0xFFFF, 24) c >>= 16 - c += load(t + 56) - store(o + 28, c & 0xFFFF) + c += load(t, 56) + store(o, c & 0xFFFF, 28) c >>= 16 - c += load(t + 64) - store(o + 32, c & 0xFFFF) + c += load(t, 64) + store(o, c & 0xFFFF, 32) c >>= 16 - c += load(t + 72) - store(o + 36, c & 0xFFFF) + c += load(t, 72) + store(o, c & 0xFFFF, 36) c >>= 16 - c += load(t + 80) - store(o + 40, c & 0xFFFF) + c += load(t, 80) + store(o, c & 0xFFFF, 40) c >>= 16 - c += load(t + 88) - store(o + 44, c & 0xFFFF) + c += load(t, 88) + store(o, c & 0xFFFF, 44) c >>= 16 - c += load(t + 96) - store(o + 48, c & 0xFFFF) + c += load(t, 96) + store(o, c & 0xFFFF, 48) c >>= 16 - c += load(t + 104) - store(o + 52, c & 0xFFFF) + c += load(t, 104) + store(o, c & 0xFFFF, 52) c >>= 16 - c += load(t + 112) - store(o + 56, c & 0xFFFF) + c += load(t, 112) + store(o, c & 0xFFFF, 56) c >>= 16 - c += load(t + 120) - store(o + 60, c & 0xFFFF) + c += load(t, 120) + store(o, c & 0xFFFF, 60) c >>= 16 store(o, load(o) + (38 * c)) } -const a: StaticArray = new StaticArray(16) -const b: StaticArray = new StaticArray(16) -const c: StaticArray = new StaticArray(16) -const d: StaticArray = new StaticArray(16) -const e: StaticArray = new StaticArray(16) -const f: StaticArray = new StaticArray(16) -const g: StaticArray = new StaticArray(16) -const h: StaticArray = new StaticArray(16) -const t: StaticArray = new StaticArray(16) -function add (p: StaticArray[], q: StaticArray[]): void { - Subtract(a, p[1], p[0]) - Subtract(t, q[1], q[0]) +// p, q = StaticArray[4][16] +const add_a: StaticArray = new StaticArray(16) +const add_b: StaticArray = new StaticArray(16) +const add_c: StaticArray = new StaticArray(16) +const add_d: StaticArray = new StaticArray(16) +const add_e: StaticArray = new StaticArray(16) +const add_f: StaticArray = new StaticArray(16) +const add_g: StaticArray = new StaticArray(16) +const add_h: StaticArray = new StaticArray(16) +const add_t: StaticArray = new StaticArray(16) +function add (p: usize, q: usize): void { + const a = changetype(add_a) + const b = changetype(add_b) + const c = changetype(add_c) + const d = changetype(add_d) + const e = changetype(add_e) + const f = changetype(add_f) + const g = changetype(add_g) + const h = changetype(add_h) + const t = changetype(add_t) + + const px = p + const py = p + 64 + const pz = p + 128 + const pt = p + 192 + + const qx = q + const qy = q + 64 + const qz = q + 128 + const qt = q + 192 + + Subtract(a, py, px) + Subtract(t, qy, qx) Multiply(a, a, t) - Add(b, p[0], p[1]) - Add(t, q[0], q[1]) + Add(b, px, py) + Add(t, qx, qy) Multiply(b, b, t) - Multiply(c, p[3], q[3]) + Multiply(c, pt, qt) Multiply(c, c, D2) - Multiply(d, p[2], q[2]) + Multiply(d, pz, qz) Add(d, d, d) Subtract(e, b, a) Subtract(f, d, c) Add(g, d, c) Add(h, b, a) - Multiply(p[0], e, f) - Multiply(p[1], h, g) - Multiply(p[2], g, f) - Multiply(p[3], e, h) + Multiply(px, e, f) + Multiply(py, h, g) + Multiply(pz, g, f) + Multiply(pt, e, h) } -function cswap (p: StaticArray[], q: StaticArray[], b: i32): void { - for (let i = 0; i < 4; i++) { - sel25519(p[i], q[i], b) - } +/** + * A = X^2, B = Y^2, C = 2*Z^2, D = -A (in mod p, Negate is p - A; tweetnacl typically does 0 - A) + * E = (X+Y)^2 - A - B + * G = D + B, F = G - C, H = D - B + * X3 = E*F, Y3 = G*H, Z3 = F*G, T3 = E*H + */ +function double (p: usize): void { + const a = changetype(add_a) + const b = changetype(add_b) + const c = changetype(add_c) + const d = changetype(add_d) + const e = changetype(add_e) + const f = changetype(add_f) + const g = changetype(add_g) + const h = changetype(add_h) + + const px = p + const py = p + 64 + const pz = p + 128 + const pt = p + 192 + + Square(a, px) + Square(b, py) + Square(c, pz) + Add(c, c, c) + Subtract(d, ZERO, a) + + Add(e, px, py) + Square(e, e) + Subtract(e, e, a) + Subtract(e, e, b) + + Add(g, d, b) + Subtract(f, g, c) + Subtract(h, d, b) + + Multiply(px, e, f) + Multiply(py, g, h) + Multiply(pz, f, g) + Multiply(pt, e, h) } +// p, q = StaticArray[16] +// offsets i32x16 = 64 bytes +@inline +function cswap (p: usize, q: usize, b: i32): void { + sel25519(p, q, b) + sel25519(p + 64, q + 64, b) + sel25519(p + 128, q + 128, b) + sel25519(p + 192, q + 192, b) +} + +// p = StaticArray[4][16] const tx: StaticArray = new StaticArray(16) const ty: StaticArray = new StaticArray(16) const zi: StaticArray = new StaticArray(16) -function pack (r: StaticArray, p: StaticArray[]): void { - inv25519(zi, p[2]) - Multiply(tx, p[0], zi) - Multiply(ty, p[1], zi) +function pack (r: StaticArray, p: usize): void { + inv25519(zi, p + 128) + Multiply(changetype(tx), p, changetype(zi)) + Multiply(changetype(ty), p + 64, changetype(zi)) pack25519(r, ty) - r[31] ^= par25519(tx) << 7 + r[31] ^= par25519(changetype(tx)) << 7 } -function scalarmult (p: StaticArray[], q: StaticArray[], s: StaticArray): void { +// p, q = StaticArray[4][16] +function scalarmult (p: usize, q: usize, s: StaticArray): void { for (let i = 255; i >= 0; i--) { const b: i32 = (s[i >> 3] >> u8(i & 7)) & 1 cswap(p, q, b) add(q, p) - add(p, p) + double(p) cswap(p, q, b) } } -const scalarbase_q: StaticArray[] = [new StaticArray(16), new StaticArray(16), new StaticArray(16), new StaticArray(16)] -function scalarbase (p: StaticArray[], s: StaticArray): void { - const q = scalarbase_q - for (let i = 0; i < 16; i++) { - q[0][i] = X[i] - q[1][i] = Y[i] - q[2][i] = 0 - q[3][i] = XY[i] - } - q[2][0] = 1 +// p = StaticArray[4][16] +const scalarbase_q = new StaticArray(64) +function scalarbase (p: usize, s: StaticArray): void { + const q = changetype(scalarbase_q) + memory.copy(q, X, 64) + memory.copy(q + 64, Y, 64) + memory.fill(q + 128, 0, 64) + memory.copy(q + 192, XY, 64) + store(q, 1, 128) scalarmult(p, q, s) } -const L: StaticArray = StaticArray.fromArray([ +// Group order of the curve ≈ 2²⁵² +const L: usize = changetype(StaticArray.fromArray([ 0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, 0xd6, 0x9c, 0xf7, 0xa2, 0xde, 0xf9, 0xde, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 -]) +])) function modL (r: StaticArray, x: StaticArray): void { let c: i32 @@ -724,7 +815,7 @@ function modL (r: StaticArray, x: StaticArray): void { c = 0 const xi = x[i] for (let j = i - 32, k = i - 12; j < k; j++) { - t = (xi * L[j - (i - 32)]) << 4 + t = (xi * load(L + ((j - (i - 32)) << 2))) << 4 v = x[j] + c - t c = (v + 128) >> 8 x[j] = v - (c << 8) @@ -735,12 +826,12 @@ function modL (r: StaticArray, x: StaticArray): void { c = 0 const x31 = x[31] for (let j = 0; j < 32; j++) { - x[j] += c - (x31 >> 4) * L[j] + x[j] += c - (x31 >> 4) * load(L + (j << 2)) c = x[j] >> 8 x[j] &= 255 } for (let j = 0; j < 32; j++) { - x[j] -= c * L[j] + x[j] -= c * load(L + (j << 2)) } for (let i = 0; i < 32; i++) { x[i + 1] += x[i] >> 8 @@ -748,8 +839,9 @@ function modL (r: StaticArray, x: StaticArray): void { } } -const x = new StaticArray(64) +const reduce_x = new StaticArray(64) function reduce (r: StaticArray): void { + const x = reduce_x for (let i = 0; i < 64; i++) { x[i] = i32(r[i]) r[i] = 0 @@ -757,22 +849,28 @@ function reduce (r: StaticArray): void { modL(r, x) } +// r = StaticArray[4][16] +const unpack_chk: StaticArray = new StaticArray(16) +const unpack_num: StaticArray = new StaticArray(16) +const unpack_den: StaticArray = new StaticArray(16) +const unpack_den2: StaticArray = new StaticArray(16) +const unpack_den4: StaticArray = new StaticArray(16) +const unpack_den6: StaticArray = new StaticArray(16) const unpack_t: StaticArray = new StaticArray(16) -const chk: StaticArray = new StaticArray(16) -const num: StaticArray = new StaticArray(16) -const den: StaticArray = new StaticArray(16) -const den2: StaticArray = new StaticArray(16) -const den4: StaticArray = new StaticArray(16) -const den6: StaticArray = new StaticArray(16) -const z: StaticArray = new StaticArray(16) -function unpackneg (r: Array>, p: StaticArray): i8 { - const t = unpack_t - - unpack25519(r[1], p) - Square(num, r[1]) +function unpackneg (r: usize, p: StaticArray): i8 { + const chk = changetype(unpack_chk) + const num = changetype(unpack_num) + const den = changetype(unpack_den) + const den2 = changetype(unpack_den2) + const den4 = changetype(unpack_den4) + const den6 = changetype(unpack_den6) + const t = changetype(unpack_t) + + unpack25519(r + 64, p) + Square(num, r + 64) Multiply(den, num, D) - Subtract(num, num, r[2]) - Add(den, r[2], den) + Subtract(num, num, r + 128) + Add(den, r + 128, den) Square(den2, den) Square(den4, den2) @@ -784,37 +882,37 @@ function unpackneg (r: Array>, p: StaticArray): i8 { Multiply(t, t, num) Multiply(t, t, den) Multiply(t, t, den) - Multiply(r[0], t, den) + Multiply(r, t, den) - Square(chk, r[0]) + Square(chk, r) Multiply(chk, chk, den) - if (neq25519(chk, num)) { - Multiply(r[0], r[0], I) + if (neq25519(changetype>(chk), changetype>(num))) { + Multiply(r, r, I) } - Square(chk, r[0]) + Square(chk, r) Multiply(chk, chk, den) - if (neq25519(chk, num)) { + if (neq25519(changetype>(chk), changetype>(num))) { return -1 } - if (par25519(r[0]) === (p[31] >> 7)) { - Subtract(r[0], z, r[0]) + if (par25519(r) === (p[31] >> 7)) { + Subtract(r, ZERO, r) } - Multiply(r[3], r[0], r[1]) + Multiply(r + 192, r, r + 64) return 0 } // Validate signature scalar S is canonical (S < L) function canonical (S: StaticArray): boolean { // If S >= 2^253 then S >= L for sure. - if ((S[32] & 0xE0) != 0) return false + if ((S[31] & 0xE0) != 0) return false // Check S-L for underflow (c=1) which means S < L let c = 0 for (let i = 0; i < 32; i++) { - const diff = S[i] - L[i] - c + const diff = S[i] - load(L + (i << 2)) - c c = (diff >> 31) & 1 } return c == 1 @@ -826,14 +924,14 @@ function crypto_hash (o: StaticArray, i: StaticArray): void { } const crypto_convert_h = new StaticArray(64) -const crypto_convert_p = [new StaticArray(16), new StaticArray(16), new StaticArray(16), new StaticArray(16)] +const crypto_convert_p = new StaticArray(64) function crypto_convert (pk: StaticArray, sk: StaticArray): void { const h = crypto_convert_h - const p = crypto_convert_p - p[0].fill(0) - p[1].fill(0); p[1][0] = 1 - p[2].fill(0); p[2][0] = 1 - p[3].fill(0) + const p = changetype(crypto_convert_p) + memory.fill(p, 0, 256) + store(p, 1, 64) + store(p, 1, 128) + crypto_hash(h, sk) h[0] &= 248 h[31] &= 127 @@ -844,7 +942,7 @@ function crypto_convert (pk: StaticArray, sk: StaticArray): void { const crypto_sign_d = new StaticArray(64) const crypto_sign_h = new StaticArray(64) -const crypto_sign_p = [new StaticArray(16), new StaticArray(16), new StaticArray(16), new StaticArray(16)] +const crypto_sign_p = new StaticArray(64) const crypto_sign_r = new StaticArray(64) const crypto_sign_x = new StaticArray(64) const crypto_sign_s = new StaticArray(SIGNATURE_BYTES) @@ -852,15 +950,15 @@ const crypto_sign_prv = new StaticArray(PRIVATEKEY_BYTES) function crypto_sign (sm: StaticArray, m: StaticArray, sk: StaticArray): void { const d = crypto_sign_d const h = crypto_sign_h - const p = crypto_sign_p + const p = changetype(crypto_sign_p) + memory.fill(p, 0, 256) + store(p, 1, 64) + store(p, 1, 128) + const r = crypto_sign_r const x = crypto_sign_x const s = crypto_sign_s const prv = crypto_sign_prv - p[0].fill(0) - p[1].fill(0); p[1][0] = 1 - p[2].fill(0); p[2][0] = 1 - p[3].fill(0) for (let i = 0; i < PRIVATEKEY_BYTES; i++) { prv[i] = sk[i] @@ -906,8 +1004,8 @@ function crypto_sign (sm: StaticArray, m: StaticArray, sk: StaticArray(64) -const crypto_verify_open_p = [new StaticArray(16), new StaticArray(16), new StaticArray(16), new StaticArray(16)] -const crypto_verify_open_q = [new StaticArray(16), new StaticArray(16), new StaticArray(16), new StaticArray(16)] +const crypto_verify_open_p = new StaticArray(64) +const crypto_verify_open_q = new StaticArray(64) const crypto_verify_open_t = new StaticArray(32) const crypto_verify_open_sm = new StaticArray(SIGNEDBLOCKHASH_BYTES) const crypto_verify_open_S = new StaticArray(32) @@ -916,19 +1014,17 @@ const crypto_verify_open_S = new StaticArray(32) */ function crypto_verify (h: StaticArray, s: StaticArray, k: StaticArray): bool { const d = crypto_verify_open_d - const p = crypto_verify_open_p - const q = crypto_verify_open_q + const p = changetype(crypto_verify_open_p) + memory.fill(p, 0, 256) + store(p, 1, 64) + store(p, 1, 128) + const q = changetype(crypto_verify_open_q) + memory.fill(q, 0, 256) + store(q, 1, 128) + const t = crypto_verify_open_t const sm = crypto_verify_open_sm const S = crypto_verify_open_S - p[0].fill(0) - p[1].fill(0); p[1][0] = 1 - p[2].fill(0); p[2][0] = 1 - p[3].fill(0) - q[0].fill(0) - q[1].fill(0) - q[2].fill(0); q[2][0] = 1 - q[3].fill(0) // fail if (unpackneg(q, k)) return false @@ -950,10 +1046,9 @@ function crypto_verify (h: StaticArray, s: StaticArray, k: StaticArray(q, 1, 64) + store(q, 1, 128) scalarbase(q, S) add(p, q) pack(t, p) diff --git a/index.ts b/index.ts index 736abf8..3d8f001 100644 --- a/index.ts +++ b/index.ts @@ -33,8 +33,13 @@ const NanoNaCl = async (bytes: number[]): Promise => { instance = await WebAssembly.instantiate(module, { env: { abort: (msg: any, file: any, row: any, col: any) => { - console.error('Wasm abort:', `msg ${msg}`, `file ${file}`, `row ${row}`, `col ${col}`) - throw new Error(`Wasm abort: ${msg}`) + // ~lib/builtins/abort(~lib/string/String | null?, ~lib/string/String | null?, u32?, u32?) => void + // msg = __liftString(msg >>> 0) + // file = __liftString(file >>> 0) + row = row >>> 0 + col = col >>> 0 + console.error('wasm abort:', `msg ${msg}`, `file ${file}`, `row ${row}`, `col ${col}`) + throw new Error(msg, { cause: { file, row, col } }) }, "performance.now" () { // ~lib/bindings/dom/performance.now() => f64 -- 2.47.3