* 10x 25- and 26-bit array of values representing a large integer `n mod p`.
*/
export type FieldElement = StaticArray<i32>
-export function fe (source: Array<i32> = new Array<i32>(10)): FieldElement {
- return StaticArray.fromArray<i32>(source)
+export function fe (src: Array<i32> = new Array<i32>(12)): FieldElement {
+ const dst = new StaticArray<i32>(12)
+ memory.copy(changetype<usize>(dst), src.dataStart, src.length << 2)
+ return dst
}
/**
//@ts-expect-error
@inline
export function fe_0 (h: FieldElement): void {
- memory.fill(changetype<usize>(h), 0, 40)
+ memory.fill(changetype<usize>(h), 0, 48)
}
/**
@inline
export function fe_1 (h: FieldElement): void {
fe_0(h)
- store<i32>(changetype<usize>(h), 1)
+ store<i32>(changetype<usize>(h), 1, 0)
}
/**
//@ts-expect-error
@inline
export function fe_add (h: FieldElement, f: FieldElement, g: FieldElement): void {
- const o = changetype<usize>(h)
- const a = changetype<usize>(f)
- const b = changetype<usize>(g)
-
- let ai = v128.load(a)
- let bi = v128.load(b)
- v128.store(o, v128.add<i32>(ai, bi))
-
- ai = v128.load(a, 16)
- bi = v128.load(b, 16)
- v128.store(o, v128.add<i32>(ai, bi), 16)
-
- ai = i64x2(load<i64>(a + 32), 0)
- bi = i64x2(load<i64>(b + 32), 0)
- v128.store_lane<i64>(o, v128.add<i32>(ai, bi), 0, 32)
+ const h_ptr = changetype<usize>(h)
+ const f_ptr = changetype<usize>(f)
+ const g_ptr = changetype<usize>(g)
+ v128.store(h_ptr, v128.add<i32>(v128.load(f_ptr, 0), v128.load(g_ptr, 0)), 0)
+ v128.store(h_ptr, v128.add<i32>(v128.load(f_ptr, 16), v128.load(g_ptr, 16)), 16)
+ v128.store_lane<i64>(h_ptr, v128.add<i32>(v128.load(f_ptr, 32), v128.load(g_ptr, 32)), 0, 32)
}
/**
//@ts-expect-error
@inline
export function fe_cmov (f: FieldElement, g: FieldElement, b: u64): void {
- const p = changetype<usize>(f)
- const q = changetype<usize>(g)
-
+ const f_ptr = changetype<usize>(f)
+ const g_ptr = changetype<usize>(g)
const c = v128.splat<u64>(0 - b)
-
- let pi = v128.load(p)
- let qi = v128.load(q)
- v128.store(p, v128.bitselect(qi, pi, c))
-
- pi = v128.load(p, 16)
- qi = v128.load(q, 16)
- v128.store(p, v128.bitselect(qi, pi, c), 16)
-
- pi = i64x2(load<i64>(p + 32), 0)
- qi = i64x2(load<i64>(q + 32), 0)
- v128.store_lane<u64>(p, v128.bitselect(qi, pi, c), 0, 32)
+ v128.store(f_ptr, v128.bitselect(v128.load(g_ptr), v128.load(f_ptr, 0), c))
+ v128.store(f_ptr, v128.bitselect(v128.load(g_ptr, 16), v128.load(f_ptr, 16), c), 16)
+ v128.store_lane<u64>(f_ptr, v128.bitselect(v128.load(g_ptr, 32), v128.load(f_ptr, 32), c), 0, 32)
}
/**
//@ts-expect-error
@inline
export function fe_cswap (f: FieldElement, g: FieldElement, b: u64): void {
- const p = changetype<usize>(f)
- const q = changetype<usize>(g)
-
+ const f_ptr = changetype<usize>(f)
+ const g_ptr = changetype<usize>(g)
const c = v128.splat<u64>(0 - b)
- let pi = v128.load(p)
- let qi = v128.load(q)
- v128.store(p, v128.bitselect(qi, pi, c))
- v128.store(q, v128.bitselect(pi, qi, c))
-
- pi = v128.load(p, 16)
- qi = v128.load(q, 16)
- v128.store(p, v128.bitselect(qi, pi, c), 16)
- v128.store(q, v128.bitselect(pi, qi, c), 16)
-
- pi = i64x2(load<i64>(p + 32), 0)
- qi = i64x2(load<i64>(q + 32), 0)
- v128.store_lane<i64>(p, v128.bitselect(qi, pi, c), 0, 32)
- v128.store_lane<i64>(q, v128.bitselect(pi, qi, c), 0, 32)
+ const f03 = v128.load(f_ptr)
+ const g03 = v128.load(g_ptr)
+ const f47 = v128.load(f_ptr, 16)
+ const g47 = v128.load(g_ptr, 16)
+ const f89 = v128.load(f_ptr, 32)
+ const g89 = v128.load(g_ptr, 32)
+
+ v128.store(f_ptr, v128.bitselect(g03, f03, c))
+ v128.store(g_ptr, v128.bitselect(f03, g03, c))
+ v128.store(f_ptr, v128.bitselect(g47, f47, c), 16)
+ v128.store(g_ptr, v128.bitselect(f47, g47, c), 16)
+ v128.store_lane<i64>(f_ptr, v128.bitselect(g89, f89, c), 0, 32)
+ v128.store_lane<i64>(g_ptr, v128.bitselect(f89, g89, c), 0, 32)
}
/**
//@ts-expect-error
@inline
export function fe_dbl (h: FieldElement, f: FieldElement): void {
- const o = changetype<usize>(h)
- const a = changetype<usize>(f)
-
- let ai = v128.load(a)
- v128.store(o, v128.add<i32>(ai, ai))
-
- ai = v128.load(a, 16)
- v128.store(o, v128.add<i32>(ai, ai), 16)
-
- ai = i64x2(load<i64>(a + 32), 0)
- v128.store_lane<i64>(o, v128.add<i32>(ai, ai), 0, 32)
+ const h_ptr = changetype<usize>(h)
+ const f_ptr = changetype<usize>(f)
+ v128.store(h_ptr, v128.shl<i32>(v128.load(f_ptr, 0), 1), 0)
+ v128.store(h_ptr, v128.shl<i32>(v128.load(f_ptr, 16), 1), 16)
+ v128.store_lane<i64>(h_ptr, v128.shl<i32>(v128.load(f_ptr, 32), 1), 0, 32)
}
/**
carry9 = (h9 + i64(1 << 24)) >> 25
h0 += carry9 * 19
- h9 -= carry9 * u64(1 << 25)
+ h9 -= carry9 << 25
carry1 = (h1 + i64(1 << 24)) >> 25
h2 += carry1
- h1 -= carry1 * u64(1 << 25)
+ h1 -= carry1 << 25
carry3 = (h3 + i64(1 << 24)) >> 25
h4 += carry3
- h3 -= carry3 * u64(1 << 25)
+ h3 -= carry3 << 25
carry5 = (h5 + i64(1 << 24)) >> 25
h6 += carry5
- h5 -= carry5 * u64(1 << 25)
+ h5 -= carry5 << 25
carry7 = (h7 + i64(1 << 24)) >> 25
h8 += carry7
- h7 -= carry7 * u64(1 << 25)
+ h7 -= carry7 << 25
carry0 = (h0 + i64(1 << 25)) >> 26
h1 += carry0
- h0 -= carry0 * u64(1 << 26)
+ h0 -= carry0 << 26
carry2 = (h2 + i64(1 << 25)) >> 26
h3 += carry2
- h2 -= carry2 * u64(1 << 26)
+ h2 -= carry2 << 26
carry4 = (h4 + i64(1 << 25)) >> 26
h5 += carry4
- h4 -= carry4 * u64(1 << 26)
+ h4 -= carry4 << 26
carry6 = (h6 + i64(1 << 25)) >> 26
h7 += carry6
- h6 -= carry6 * u64(1 << 26)
+ h6 -= carry6 << 26
carry8 = (h8 + i64(1 << 25)) >> 26
h9 += carry8
- h8 -= carry8 * u64(1 << 26)
-
- h[0] = i32(h0)
- h[1] = i32(h1)
- h[2] = i32(h2)
- h[3] = i32(h3)
- h[4] = i32(h4)
- h[5] = i32(h5)
- h[6] = i32(h6)
- h[7] = i32(h7)
- h[8] = i32(h8)
- h[9] = i32(h9)
+ h8 -= carry8 << 26
+
+ const h_ptr: usize = changetype<usize>(h)
+ store<i32>(h_ptr, h0, 0)
+ store<i32>(h_ptr, h1, 4)
+ store<i32>(h_ptr, h2, 8)
+ store<i32>(h_ptr, h3, 12)
+ store<i32>(h_ptr, h4, 16)
+ store<i32>(h_ptr, h5, 20)
+ store<i32>(h_ptr, h6, 24)
+ store<i32>(h_ptr, h7, 28)
+ store<i32>(h_ptr, h8, 32)
+ store<i32>(h_ptr, h9, 36)
}
const t0: FieldElement = fe()
fe_mul(out, t1, t0)
}
-const fe_isnegative_s = new StaticArray<u8>(32)
+const fe_isnegative_t = fe()
/**
* return 1 if f is in {1,3,5,...,q-2}
* return 0 if f is in {0,2,4,...,q-1}
//@ts-expect-error
@inline
export function fe_isnegative (f: FieldElement): u8 {
- const s = fe_isnegative_s
- fe_tobytes(s, f)
- return s[0] & 1
+ const t = fe_isnegative_t
+ fe_reduce(t, f)
+ return load<u8>(changetype<usize>(t), 0) & 1
}
const fe_iszero_s = new StaticArray<u8>(32)
return sodium_is_zero(s, 32)
}
-const multiply_t = new StaticArray<i64>(24)
/**
* Multiply two FieldElements and store the product.
*
* @param g FieldElement multiplicand source
*/
export function fe_mul (h: FieldElement, f: FieldElement, g: FieldElement): void {
- const o = changetype<usize>(h)
- const a = changetype<usize>(f)
- const b = changetype<usize>(g)
-
- const t = changetype<usize>(multiply_t)
- memory.fill(t, 0, 160)
-
- const b0: v128 = v128.load(b)
- const b4: v128 = v128.load(b + 16)
- const b8: v128 = i32x4(load<i32>(b + 32), load<i32>(b + 36), 0, 0)
-
- for (let i = 0; i < 10; i++) {
- const odd = (i & 1) + 1
- const ai = v128.splat<i32>(load<i32>(a + (i << 2)))
-
- let ptr = t + (usize(i) << 3)
- let pLo = i64x2.extmul_low_i32x4_s(ai, b0)
- let pHi = i64x2.extmul_high_i32x4_s(ai, b0)
- pLo = v128.mul<i64>(pLo, i64x2(1, odd))
- pHi = v128.mul<i64>(pHi, i64x2(1, odd))
- let tLo = v128.load(ptr)
- let tHi = v128.load(ptr + 16)
- tLo = i64x2.add(tLo, pLo)
- tHi = i64x2.add(tHi, pHi)
- v128.store(ptr, tLo)
- v128.store(ptr + 16, tHi)
-
- ptr += 32
- pLo = i64x2.extmul_low_i32x4_s(ai, b4)
- pHi = i64x2.extmul_high_i32x4_s(ai, b4)
- pLo = v128.mul<i64>(pLo, i64x2(1, odd))
- pHi = v128.mul<i64>(pHi, i64x2(1, odd))
- tLo = v128.load(ptr)
- tHi = v128.load(ptr + 16)
- tLo = i64x2.add(tLo, pLo)
- tHi = i64x2.add(tHi, pHi)
- v128.store(ptr, tLo)
- v128.store(ptr + 16, tHi)
-
- ptr += 32
- pLo = i64x2.extmul_low_i32x4_s(ai, b8)
- pHi = i64x2.extmul_high_i32x4_s(ai, b8)
- pLo = v128.mul<i64>(pLo, i64x2(1, odd))
- pHi = v128.mul<i64>(pHi, i64x2(1, odd))
- tLo = v128.load(ptr)
- tHi = v128.load(ptr + 16)
- tLo = i64x2.add(tLo, pLo)
- tHi = i64x2.add(tHi, pHi)
- v128.store(ptr, tLo)
- v128.store(ptr + 16, tHi)
- }
- // discard last 4 elements since we do not actually need them
- memory.fill(t + 160, 0, 32)
- normalize(o, t)
+ const f_ptr: usize = changetype<usize>(f)
+ const g_ptr: usize = changetype<usize>(g)
+ const g0123: v128 = v128.load(g_ptr, 0)
+ const g4567: v128 = v128.load(g_ptr, 16)
+ const g89xx: v128 = v128.load(g_ptr, 32)
+
+ // mask to select even lane from first vector and odd lane from second
+ const m: v128 = i32x4(-1, 0, -1, 0)
+
+ // mask to double odd-on-odd indexes
+ const odds: v128 = i32x4(0, -1, 0, -1)
+
+ // factor of 19 for wrap from h9 to h0
+ const v19: v128 = i32x4.splat(19)
+ const g0123_19: v128 = i32x4.mul(g0123, v19)
+ const g4567_19: v128 = i32x4.mul(g4567, v19)
+ const g89xx_19: v128 = i32x4.mul(g89xx, v19)
+
+ // f[0]
+ let fi: i32 = load<i32>(f_ptr, 0)
+ let fv: v128 = i32x4.splat(fi)
+
+ let h01: v128 = i64x2.extmul_low_i32x4_s(fv, g0123)
+ let h23: v128 = i64x2.extmul_high_i32x4_s(fv, g0123)
+ let h45: v128 = i64x2.extmul_low_i32x4_s(fv, g4567)
+ let h67: v128 = i64x2.extmul_high_i32x4_s(fv, g4567)
+ let h89: v128 = i64x2.extmul_low_i32x4_s(fv, g89xx)
+
+ // f[1]
+ fi = load<i32>(f_ptr, 4)
+ fv = i32x4.splat(fi)
+ fv = i32x4.add(fv, v128.and(fv, odds))
+
+ let h12: v128 = i64x2.extmul_low_i32x4_s(fv, g0123)
+ let h34: v128 = i64x2.extmul_high_i32x4_s(fv, g0123)
+ let h56: v128 = i64x2.extmul_low_i32x4_s(fv, g4567)
+ let h78: v128 = i64x2.extmul_high_i32x4_s(fv, g4567)
+ let h90: v128 = i64x2.extmul_low_i32x4_s(fv, v128.bitselect(g89xx, g89xx_19, m))
+
+ // f[2]
+ fi = load<i32>(f_ptr, 8)
+ fv = i32x4.splat(fi)
+
+ let t0: v128 = i64x2.extmul_low_i32x4_s(fv, g0123)
+ let t1: v128 = i64x2.extmul_high_i32x4_s(fv, g0123)
+ let t2: v128 = i64x2.extmul_low_i32x4_s(fv, g4567)
+ let t3: v128 = i64x2.extmul_high_i32x4_s(fv, g4567)
+ let t4: v128 = i64x2.extmul_low_i32x4_s(fv, g89xx_19)
+
+ h23 = i64x2.add(h23, t0)
+ h45 = i64x2.add(h45, t1)
+ h67 = i64x2.add(h67, t2)
+ h89 = i64x2.add(h89, t3)
+ h01 = i64x2.add(h01, t4)
+
+ // f[3]
+ fi = load<i32>(f_ptr, 12)
+ fv = i32x4.splat(fi)
+ fv = i32x4.add(fv, v128.and(fv, odds))
+
+ t0 = i64x2.extmul_low_i32x4_s(fv, g0123)
+ t1 = i64x2.extmul_high_i32x4_s(fv, g0123)
+ t2 = i64x2.extmul_low_i32x4_s(fv, g4567)
+ t3 = i64x2.extmul_high_i32x4_s(fv, v128.bitselect(g4567, g4567_19, m))
+ t4 = i64x2.extmul_low_i32x4_s(fv, g89xx_19)
+
+ h34 = i64x2.add(h34, t0)
+ h56 = i64x2.add(h56, t1)
+ h78 = i64x2.add(h78, t2)
+ h90 = i64x2.add(h90, t3)
+ h12 = i64x2.add(h12, t4)
+
+ // f[4]
+ fi = load<i32>(f_ptr, 16)
+ fv = i32x4.splat(fi)
+
+ t0 = i64x2.extmul_low_i32x4_s(fv, g0123)
+ t1 = i64x2.extmul_high_i32x4_s(fv, g0123)
+ t2 = i64x2.extmul_low_i32x4_s(fv, g4567)
+ t3 = i64x2.extmul_high_i32x4_s(fv, g4567_19)
+ t4 = i64x2.extmul_low_i32x4_s(fv, g89xx_19)
+
+ h45 = i64x2.add(h45, t0)
+ h67 = i64x2.add(h67, t1)
+ h89 = i64x2.add(h89, t2)
+ h01 = i64x2.add(h01, t3)
+ h23 = i64x2.add(h23, t4)
+
+ // f[5]
+ fi = load<i32>(f_ptr, 20)
+ fv = i32x4.splat(fi)
+ fv = i32x4.add(fv, v128.and(fv, odds))
+
+ t0 = i64x2.extmul_low_i32x4_s(fv, g0123)
+ t1 = i64x2.extmul_high_i32x4_s(fv, g0123)
+ t2 = i64x2.extmul_low_i32x4_s(fv, v128.bitselect(g4567, g4567_19, m))
+ t3 = i64x2.extmul_high_i32x4_s(fv, g4567_19)
+ t4 = i64x2.extmul_low_i32x4_s(fv, g89xx_19)
+
+ h56 = i64x2.add(h56, t0)
+ h78 = i64x2.add(h78, t1)
+ h90 = i64x2.add(h90, t2)
+ h12 = i64x2.add(h12, t3)
+ h34 = i64x2.add(h34, t4)
+
+ // f[6]
+ fi = load<i32>(f_ptr, 24)
+ fv = i32x4.splat(fi)
+
+ t0 = i64x2.extmul_low_i32x4_s(fv, g0123)
+ t1 = i64x2.extmul_high_i32x4_s(fv, g0123)
+ t2 = i64x2.extmul_low_i32x4_s(fv, g4567_19)
+ t3 = i64x2.extmul_high_i32x4_s(fv, g4567_19)
+ t4 = i64x2.extmul_low_i32x4_s(fv, g89xx_19)
+
+ h67 = i64x2.add(h67, t0)
+ h89 = i64x2.add(h89, t1)
+ h01 = i64x2.add(h01, t2)
+ h23 = i64x2.add(h23, t3)
+ h45 = i64x2.add(h45, t4)
+
+ // f[7]
+ fi = load<i32>(f_ptr, 28)
+ fv = i32x4.splat(fi)
+ fv = i32x4.add(fv, v128.and(fv, odds))
+
+ t0 = i64x2.extmul_low_i32x4_s(fv, g0123)
+ t1 = i64x2.extmul_high_i32x4_s(fv, v128.bitselect(g0123, g0123_19, m))
+ t2 = i64x2.extmul_low_i32x4_s(fv, g4567_19)
+ t3 = i64x2.extmul_high_i32x4_s(fv, g4567_19)
+ t4 = i64x2.extmul_low_i32x4_s(fv, g89xx_19)
+
+ h78 = i64x2.add(h78, t0)
+ h90 = i64x2.add(h90, t1)
+ h12 = i64x2.add(h12, t2)
+ h34 = i64x2.add(h34, t3)
+ h56 = i64x2.add(h56, t4)
+
+ // f[8]
+ fi = load<i32>(f_ptr, 32)
+ fv = i32x4.splat(fi)
+
+ t0 = i64x2.extmul_low_i32x4_s(fv, g0123)
+ t1 = i64x2.extmul_high_i32x4_s(fv, g0123_19)
+ t2 = i64x2.extmul_low_i32x4_s(fv, g4567_19)
+ t3 = i64x2.extmul_high_i32x4_s(fv, g4567_19)
+ t4 = i64x2.extmul_low_i32x4_s(fv, g89xx_19)
+
+ h89 = i64x2.add(h89, t0)
+ h01 = i64x2.add(h01, t1)
+ h23 = i64x2.add(h23, t2)
+ h45 = i64x2.add(h45, t3)
+ h67 = i64x2.add(h67, t4)
+
+ // f[9]
+ fi = load<i32>(f_ptr, 36)
+ fv = i32x4.splat(fi)
+ fv = i32x4.add(fv, v128.and(fv, odds))
+
+ t0 = i64x2.extmul_low_i32x4_s(fv, v128.bitselect(g0123, g0123_19, m))
+ t1 = i64x2.extmul_high_i32x4_s(fv, g0123_19)
+ t2 = i64x2.extmul_low_i32x4_s(fv, g4567_19)
+ t3 = i64x2.extmul_high_i32x4_s(fv, g4567_19)
+ t4 = i64x2.extmul_low_i32x4_s(fv, g89xx_19)
+
+ h90 = i64x2.add(h90, t0)
+ h12 = i64x2.add(h12, t1)
+ h34 = i64x2.add(h34, t2)
+ h56 = i64x2.add(h56, t3)
+ h78 = i64x2.add(h78, t4)
+
+ // extract scalars from vectors
+ let h0: i64 = i64x2.extract_lane(h90, 1) + i64x2.extract_lane(h01, 0)
+ let h1: i64 = i64x2.extract_lane(h01, 1) + i64x2.extract_lane(h12, 0)
+ let h2: i64 = i64x2.extract_lane(h12, 1) + i64x2.extract_lane(h23, 0)
+ let h3: i64 = i64x2.extract_lane(h23, 1) + i64x2.extract_lane(h34, 0)
+ let h4: i64 = i64x2.extract_lane(h34, 1) + i64x2.extract_lane(h45, 0)
+ let h5: i64 = i64x2.extract_lane(h45, 1) + i64x2.extract_lane(h56, 0)
+ let h6: i64 = i64x2.extract_lane(h56, 1) + i64x2.extract_lane(h67, 0)
+ let h7: i64 = i64x2.extract_lane(h67, 1) + i64x2.extract_lane(h78, 0)
+ let h8: i64 = i64x2.extract_lane(h78, 1) + i64x2.extract_lane(h89, 0)
+ let h9: i64 = i64x2.extract_lane(h89, 1) + i64x2.extract_lane(h90, 0)
+
+ // extract scalars from vectors during first carry
+ let carry0: i64
+ let carry1: i64
+ let carry2: i64
+ let carry3: i64
+ let carry4: i64
+ let carry5: i64
+ let carry6: i64
+ let carry7: i64
+ let carry8: i64
+ let carry9: i64
+
+ carry0 = (h0 + (1 << 25)) >> 26
+ h1 += carry0
+ h0 -= carry0 << 26
+ carry4 = (h4 + (1 << 25)) >> 26
+ h5 += carry4
+ h4 -= carry4 << 26
+
+ carry1 = (h1 + (1 << 24)) >> 25
+ h2 += carry1
+ h1 -= carry1 << 25
+ carry5 = (h5 + (1 << 24)) >> 25
+ h6 += carry5
+ h5 -= carry5 << 25
+
+ carry2 = (h2 + (1 << 25)) >> 26
+ h3 += carry2
+ h2 -= carry2 << 26
+ carry6 = (h6 + (1 << 25)) >> 26
+ h7 += carry6
+ h6 -= carry6 << 26
+
+ carry3 = (h3 + (1 << 24)) >> 25
+ h4 += carry3
+ h3 -= carry3 << 25
+ carry7 = (h7 + (1 << 24)) >> 25
+ h8 += carry7
+ h7 -= carry7 << 25
+
+ carry4 = (h4 + (1 << 25)) >> 26
+ h5 += carry4
+ h4 -= carry4 << 26
+ carry8 = (h8 + (1 << 25)) >> 26
+ h9 += carry8
+ h8 -= carry8 << 26
+
+ carry9 = (h9 + (1 << 24)) >> 25
+ h0 += carry9 * 19
+ h9 -= carry9 << 25
+
+ carry0 = (h0 + (1 << 25)) >> 26
+ h1 += carry0
+ h0 -= carry0 << 26
+
+ // assign results to output
+ const h_ptr: usize = changetype<usize>(h)
+ store<i32>(h_ptr, h0, 0)
+ store<i32>(h_ptr, h1, 4)
+ store<i32>(h_ptr, h2, 8)
+ store<i32>(h_ptr, h3, 12)
+ store<i32>(h_ptr, h4, 16)
+ store<i32>(h_ptr, h5, 20)
+ store<i32>(h_ptr, h6, 24)
+ store<i32>(h_ptr, h7, 28)
+ store<i32>(h_ptr, h8, 32)
+ store<i32>(h_ptr, h9, 36)
}
/**
//@ts-expect-error
@inline
export function fe_neg (h: FieldElement, f: FieldElement): void {
- const o = changetype<usize>(h)
- const a = changetype<usize>(f)
-
- let ai = v128.load(a)
- v128.store(o, v128.neg<i32>(ai))
-
- ai = v128.load(a, 16)
- v128.store(o, v128.neg<i32>(ai), 16)
-
- ai = i64x2(load<i64>(a + 32), 0)
- v128.store_lane<i64>(o, v128.neg<i32>(ai), 0, 32)
+ const h_ptr = changetype<usize>(h)
+ const f_ptr = changetype<usize>(f)
+ v128.store(h_ptr, v128.neg<i32>(v128.load(f_ptr)))
+ v128.store(h_ptr, v128.neg<i32>(v128.load(f_ptr, 16)), 16)
+ v128.store_lane<i64>(h_ptr, v128.neg<i32>(v128.load(f_ptr, 32)), 0, 32)
}
const fe_pow22523_t0: FieldElement = fe()
* so floor(2⁻²⁵⁵(h + 19 2^(-25) h9 + 2^(-1))) = q.
*/
export function fe_reduce (h: FieldElement, f: FieldElement): void {
- let h0 = f[0]
- let h1 = f[1]
- let h2 = f[2]
- let h3 = f[3]
- let h4 = f[4]
- let h5 = f[5]
- let h6 = f[6]
- let h7 = f[7]
- let h8 = f[8]
- let h9 = f[9]
+ const f_ptr: usize = changetype<usize>(f)
+ let f0: i32 = load<i32>(f_ptr, 0)
+ let f1: i32 = load<i32>(f_ptr, 4)
+ let f2: i32 = load<i32>(f_ptr, 8)
+ let f3: i32 = load<i32>(f_ptr, 12)
+ let f4: i32 = load<i32>(f_ptr, 16)
+ let f5: i32 = load<i32>(f_ptr, 20)
+ let f6: i32 = load<i32>(f_ptr, 24)
+ let f7: i32 = load<i32>(f_ptr, 28)
+ let f8: i32 = load<i32>(f_ptr, 32)
+ let f9: i32 = load<i32>(f_ptr, 36)
let q: i32
- let carry0: i32, carry1: i32, carry2: i32, carry3: i32, carry4: i32, carry5: i32, carry6: i32, carry7: i32, carry8: i32, carry9: i32
-
- q = (19 * h9 + (u32(1) << 24)) >> 25
- q = (h0 + q) >> 26
- q = (h1 + q) >> 25
- q = (h2 + q) >> 26
- q = (h3 + q) >> 25
- q = (h4 + q) >> 26
- q = (h5 + q) >> 25
- q = (h6 + q) >> 26
- q = (h7 + q) >> 25
- q = (h8 + q) >> 26
- q = (h9 + q) >> 25
+ let c0: i32, c1: i32, c2: i32, c3: i32, c4: i32, c5: i32, c6: i32, c7: i32, c8: i32, c9: i32
+
+ q = (19 * f9 + (1 << 24)) >> 25
+ q = (f0 + q) >> 26
+ q = (f1 + q) >> 25
+ q = (f2 + q) >> 26
+ q = (f3 + q) >> 25
+ q = (f4 + q) >> 26
+ q = (f5 + q) >> 25
+ q = (f6 + q) >> 26
+ q = (f7 + q) >> 25
+ q = (f8 + q) >> 26
+ q = (f9 + q) >> 25
/* Goal: Output h-(2²⁵⁵-19)q, which is between 0 and 2²⁵⁵-20. */
- h0 += 19 * q
+ f0 += 19 * q
/* Goal: Output h-2²⁵⁵ q, which is between 0 and 2²⁵⁵-20. */
- carry0 = h0 >> 26
+ c0 = f0 >> 26
+ f1 += c0
+ f0 -= c0 << 26
+ c1 = f1 >> 25
+ f2 += c1
+ f1 -= c1 << 25
+ c2 = f2 >> 26
+ f3 += c2
+ f2 -= c2 << 26
+ c3 = f3 >> 25
+ f4 += c3
+ f3 -= c3 << 25
+ c4 = f4 >> 26
+ f5 += c4
+ f4 -= c4 << 26
+ c5 = f5 >> 25
+ f6 += c5
+ f5 -= c5 << 25
+ c6 = f6 >> 26
+ f7 += c6
+ f6 -= c6 << 26
+ c7 = f7 >> 25
+ f8 += c7
+ f7 -= c7 << 25
+ c8 = f8 >> 26
+ f9 += c8
+ f8 -= c8 << 26
+ c9 = f9 >> 25
+ f9 -= c9 << 25
+
+ const h_ptr: usize = changetype<usize>(h)
+ store<i32>(h_ptr, f0, 0)
+ store<i32>(h_ptr, f1, 4)
+ store<i32>(h_ptr, f2, 8)
+ store<i32>(h_ptr, f3, 12)
+ store<i32>(h_ptr, f4, 16)
+ store<i32>(h_ptr, f5, 20)
+ store<i32>(h_ptr, f6, 24)
+ store<i32>(h_ptr, f7, 28)
+ store<i32>(h_ptr, f8, 32)
+ store<i32>(h_ptr, f9, 36)
+}
+
+/**
+ * Square a FieldElement and store the result.
+ *
+ * Some iterations of the inner loop can be skipped since
+ * `f[x]*f[y] + f[y]*f[x] = 2*f[x]*f[y]`
+ *
+ * ```
+ * // non-constant-time example
+ * for (let i = 0; i < 10; i++) {
+ * for (let j = i; j < 10; j++) {
+ * h[i + j] += f[i] * g[j]
+ * if (i < j) {
+ * h[i + j] *= 2
+ * }
+ * }
+ * }
+ * ```
+ *
+ * h = f * f
+ * Can overlap h with f.
+ *
+ * Preconditions:
+ * |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
+ *
+ * Postconditions:
+ * |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
+ *
+ * @param h FieldElement power destination
+ * @param f FieldElement base source
+ */
+export function fe_sq (h: FieldElement, f: FieldElement): void {
+ const f_ptr: usize = changetype<usize>(f)
+ const f0: i64 = i64(load<i32>(f_ptr, 0))
+ const f1: i64 = i64(load<i32>(f_ptr, 4))
+ const f2: i64 = i64(load<i32>(f_ptr, 8))
+ const f3: i64 = i64(load<i32>(f_ptr, 12))
+ const f4: i64 = i64(load<i32>(f_ptr, 16))
+ const f5: i64 = i64(load<i32>(f_ptr, 20))
+ const f6: i64 = i64(load<i32>(f_ptr, 24))
+ const f7: i64 = i64(load<i32>(f_ptr, 28))
+ const f8: i64 = i64(load<i32>(f_ptr, 32))
+ const f9: i64 = i64(load<i32>(f_ptr, 36))
+
+ const f0_2: i64 = f0 * 2
+ const f1_2: i64 = f1 * 2
+ const f2_2: i64 = f2 * 2
+ const f3_2: i64 = f3 * 2
+ const f4_2: i64 = f4 * 2
+ const f5_2: i64 = f5 * 2
+ const f6_2: i64 = f6 * 2
+ const f7_2: i64 = f7 * 2
+
+ const f5_19: i64 = f5 * 19 /* 1.959375*2^29 */
+ const f6_19: i64 = f6 * 19 /* 1.959375*2^30 */
+ const f7_19: i64 = f7 * 19 /* 1.959375*2^29 */
+ const f8_19: i64 = f8 * 19 /* 1.959375*2^30 */
+ const f9_19: i64 = f9 * 19 /* 1.959375*2^29 */
+
+ const f7_38: i64 = f7 * 38 /* 1.959375*2^30 */
+ const f9_38: i64 = f9 * 38 /* 1.959375*2^30 */
+
+ let h0: i64 = f0 * f0
+ let h1: i64 = f0_2 * f1
+ let h2: i64 = f0_2 * f2
+ let h3: i64 = f0_2 * f3
+ let h4: i64 = f0_2 * f4
+ let h5: i64 = f0_2 * f5
+ let h6: i64 = f0_2 * f6
+ let h7: i64 = f0_2 * f7
+ let h8: i64 = f0_2 * f8
+ let h9: i64 = f0_2 * f9
+
+ h2 += f1_2 * f1
+ h3 += f1_2 * f2
+ h4 += f1_2 * f3_2
+ h5 += f1_2 * f4
+ h6 += f1_2 * f5_2
+ h7 += f1_2 * f6
+ h8 += f1_2 * f7_2
+ h9 += f1_2 * f8
+ h0 += f1_2 * f9_38
+
+ h4 += f2 * f2
+ h5 += f2_2 * f3
+ h6 += f2_2 * f4
+ h7 += f2_2 * f5
+ h8 += f2_2 * f6
+ h9 += f2_2 * f7
+ h0 += f2_2 * f8_19
+ h1 += f2_2 * f9_19
+
+ h6 += f3_2 * f3
+ h7 += f3_2 * f4
+ h8 += f3_2 * f5_2
+ h9 += f3_2 * f6
+ h0 += f3_2 * f7_38
+ h1 += f3_2 * f8_19
+ h2 += f3_2 * f9_38
+
+ h8 += f4 * f4
+ h9 += f4_2 * f5
+ h0 += f4_2 * f6_19
+ h1 += f4_2 * f7_19
+ h2 += f4_2 * f8_19
+ h3 += f4_2 * f9_19
+
+ h0 += f5_2 * f5_19
+ h1 += f5_2 * f6_19
+ h2 += f5_2 * f7_38
+ h3 += f5_2 * f8_19
+ h4 += f5_2 * f9_38
+
+ h2 += f6 * f6_19
+ h3 += f6_2 * f7_19
+ h4 += f6_2 * f8_19
+ h5 += f6_2 * f9_19
+
+ h4 += f7_2 * f7_19
+ h5 += f7_2 * f8_19
+ h6 += f7_2 * f9_38
+
+ h6 += f8 * f8_19
+ h7 += f8 * f9_38
+
+ h8 += f9 * f9_38
+
+ let carry0: i64
+ let carry1: i64
+ let carry2: i64
+ let carry3: i64
+ let carry4: i64
+ let carry5: i64
+ let carry6: i64
+ let carry7: i64
+ let carry8: i64
+ let carry9: i64
+
+ carry0 = (h0 + i64(1 << 25)) >> 26
h1 += carry0
- h0 -= carry0 * u32(1 << 26)
- carry1 = h1 >> 25
- h2 += carry1
- h1 -= carry1 * u32(1 << 25)
- carry2 = h2 >> 26
- h3 += carry2
- h2 -= carry2 * u32(1 << 26)
- carry3 = h3 >> 25
- h4 += carry3
- h3 -= carry3 * u32(1 << 25)
- carry4 = h4 >> 26
+ h0 -= carry0 << 26
+ carry4 = (h4 + i64(1 << 25)) >> 26
h5 += carry4
- h4 -= carry4 * u32(1 << 26)
- carry5 = h5 >> 25
+ h4 -= carry4 << 26
+
+ carry1 = (h1 + i64(1 << 24)) >> 25
+ h2 += carry1
+ h1 -= carry1 << 25
+ carry5 = (h5 + i64(1 << 24)) >> 25
h6 += carry5
- h5 -= carry5 * u32(1 << 25)
- carry6 = h6 >> 26
+ h5 -= carry5 << 25
+
+ carry2 = (h2 + i64(1 << 25)) >> 26
+ h3 += carry2
+ h2 -= carry2 << 26
+ carry6 = (h6 + i64(1 << 25)) >> 26
h7 += carry6
- h6 -= carry6 * u32(1 << 26)
- carry7 = h7 >> 25
+ h6 -= carry6 << 26
+
+ carry3 = (h3 + i64(1 << 24)) >> 25
+ h4 += carry3
+ h3 -= carry3 << 25
+ carry7 = (h7 + i64(1 << 24)) >> 25
h8 += carry7
- h7 -= carry7 * u32(1 << 25)
- carry8 = h8 >> 26
+ h7 -= carry7 << 25
+
+ carry4 = (h4 + i64(1 << 25)) >> 26
+ h5 += carry4
+ h4 -= carry4 << 26
+ carry8 = (h8 + i64(1 << 25)) >> 26
h9 += carry8
- h8 -= carry8 * u32(1 << 26)
- carry9 = h9 >> 25
- h9 -= carry9 * u32(1 << 25)
-
- h[0] = h0
- h[1] = h1
- h[2] = h2
- h[3] = h3
- h[4] = h4
- h[5] = h5
- h[6] = h6
- h[7] = h7
- h[8] = h8
- h[9] = h9
+ h8 -= carry8 << 26
+
+ carry9 = (h9 + i64(1 << 24)) >> 25
+ h0 += carry9 * 19
+ h9 -= carry9 << 25
+
+ carry0 = (h0 + i64(1 << 25)) >> 26
+ h1 += carry0
+ h0 -= carry0 << 26
+
+ const h_ptr: usize = changetype<usize>(h)
+ store<i32>(h_ptr, h0, 0)
+ store<i32>(h_ptr, h1, 4)
+ store<i32>(h_ptr, h2, 8)
+ store<i32>(h_ptr, h3, 12)
+ store<i32>(h_ptr, h4, 16)
+ store<i32>(h_ptr, h5, 20)
+ store<i32>(h_ptr, h6, 24)
+ store<i32>(h_ptr, h7, 28)
+ store<i32>(h_ptr, h8, 32)
+ store<i32>(h_ptr, h9, 36)
}
/**
* Square a FieldElement and store the result.
*
- * @param h FieldElement power destination
- * @param f FieldElement base source
+ * Some iterations of the inner loop can be skipped since
+ * `f[x]*f[y] + f[y]*f[x] = 2*f[x]*f[y]`
+ *
+ * ```
+ * // non-constant-time example
+ * for (let i = 0; i < 10; i++) {
+ * for (let j = i; j < 10; j++) {
+ * h[i + j] += f[i] * g[j]
+ * if (i < j) {
+ * h[i + j] *= 2
+ * }
+ * }
+ * }
+ * ```
+ *
+ * h = f * f
+ * Can overlap h with f.
+ *
+ * Preconditions:
+ * |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
+ *
+ * Postconditions:
+ * |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
+ *
+ * @param hx FieldElement power destination
+ * @param fx FieldElement base source
*/
-//@ts-expect-error
-@inline
-export function fe_sq (h: FieldElement, f: FieldElement): void {
- fe_mul(h, f, f)
+export function fe_sq_vec (hx: FieldElement, fx: FieldElement, hy: FieldElement, fy: FieldElement): void {
+ const fx_ptr: usize = changetype<usize>(fx)
+ const fy_ptr: usize = changetype<usize>(fy)
+ const f0: v128 = i32x4(load<i32>(fx_ptr, 0), load<i32>(fy_ptr, 0), 0, 0)
+ const f1: v128 = i32x4(load<i32>(fx_ptr, 4), load<i32>(fy_ptr, 4), 0, 0)
+ const f2: v128 = i32x4(load<i32>(fx_ptr, 8), load<i32>(fy_ptr, 8), 0, 0)
+ const f3: v128 = i32x4(load<i32>(fx_ptr, 12), load<i32>(fy_ptr, 12), 0, 0)
+ const f4: v128 = i32x4(load<i32>(fx_ptr, 16), load<i32>(fy_ptr, 16), 0, 0)
+ const f5: v128 = i32x4(load<i32>(fx_ptr, 20), load<i32>(fy_ptr, 20), 0, 0)
+ const f6: v128 = i32x4(load<i32>(fx_ptr, 24), load<i32>(fy_ptr, 24), 0, 0)
+ const f7: v128 = i32x4(load<i32>(fx_ptr, 28), load<i32>(fy_ptr, 28), 0, 0)
+ const f8: v128 = i32x4(load<i32>(fx_ptr, 32), load<i32>(fy_ptr, 32), 0, 0)
+ const f9: v128 = i32x4(load<i32>(fx_ptr, 36), load<i32>(fy_ptr, 36), 0, 0)
+
+ const v19: v128 = i32x4.splat(19)
+ const f5_19: v128 = i32x4.mul(f5, v19) /* 1.959375*2^29 */
+ const f6_19: v128 = i32x4.mul(f6, v19) /* 1.959375*2^30 */
+ const f7_19: v128 = i32x4.mul(f7, v19) /* 1.959375*2^29 */
+ const f8_19: v128 = i32x4.mul(f8, v19) /* 1.959375*2^30 */
+ const f9_19: v128 = i32x4.mul(f9, v19) /* 1.959375*2^29 */
+
+ // f[0]
+ let f_2: v128 = i32x4.shl(f0, 1)
+
+ let h0: v128 = i64x2.extmul_low_i32x4_s(f0, f0)
+ let h1: v128 = i64x2.extmul_low_i32x4_s(f_2, f1)
+ let h2: v128 = i64x2.extmul_low_i32x4_s(f_2, f2)
+ let h3: v128 = i64x2.extmul_low_i32x4_s(f_2, f3)
+ let h4: v128 = i64x2.extmul_low_i32x4_s(f_2, f4)
+ let h5: v128 = i64x2.extmul_low_i32x4_s(f_2, f5)
+ let h6: v128 = i64x2.extmul_low_i32x4_s(f_2, f6)
+ let h7: v128 = i64x2.extmul_low_i32x4_s(f_2, f7)
+ let h8: v128 = i64x2.extmul_low_i32x4_s(f_2, f8)
+ let h9: v128 = i64x2.extmul_low_i32x4_s(f_2, f9)
+
+ // f[1]
+ f_2 = i32x4.shl(f1, 1)
+ let f_4: v128 = i32x4.shl(f1, 2)
+
+ let t0: v128 = i64x2.extmul_low_i32x4_s(f_2, f1)
+ let t1: v128 = i64x2.extmul_low_i32x4_s(f_2, f2)
+ let t2: v128 = i64x2.extmul_low_i32x4_s(f_4, f3)
+ let t3: v128 = i64x2.extmul_low_i32x4_s(f_2, f4)
+ let t4: v128 = i64x2.extmul_low_i32x4_s(f_4, f5)
+ let t5: v128 = i64x2.extmul_low_i32x4_s(f_2, f6)
+ let t6: v128 = i64x2.extmul_low_i32x4_s(f_4, f7)
+ let t7: v128 = i64x2.extmul_low_i32x4_s(f_2, f8)
+ let t8: v128 = i64x2.extmul_low_i32x4_s(f_4, f9_19)
+
+ h2 = i64x2.add(h2, t0)
+ h3 = i64x2.add(h3, t1)
+ h4 = i64x2.add(h4, t2)
+ h5 = i64x2.add(h5, t3)
+ h6 = i64x2.add(h6, t4)
+ h7 = i64x2.add(h7, t5)
+ h8 = i64x2.add(h8, t6)
+ h9 = i64x2.add(h9, t7)
+ h0 = i64x2.add(h0, t8)
+
+ // f[2]
+ f_2 = i32x4.shl(f2, 1)
+
+ t0 = i64x2.extmul_low_i32x4_s(f2, f2)
+ t1 = i64x2.extmul_low_i32x4_s(f_2, f3)
+ t2 = i64x2.extmul_low_i32x4_s(f_2, f4)
+ t3 = i64x2.extmul_low_i32x4_s(f_2, f5)
+ t4 = i64x2.extmul_low_i32x4_s(f_2, f6)
+ t5 = i64x2.extmul_low_i32x4_s(f_2, f7)
+ t6 = i64x2.extmul_low_i32x4_s(f_2, f8_19)
+ t7 = i64x2.extmul_low_i32x4_s(f_2, f9_19)
+
+ h4 = i64x2.add(h4, t0)
+ h5 = i64x2.add(h5, t1)
+ h6 = i64x2.add(h6, t2)
+ h7 = i64x2.add(h7, t3)
+ h8 = i64x2.add(h8, t4)
+ h9 = i64x2.add(h9, t5)
+ h0 = i64x2.add(h0, t6)
+ h1 = i64x2.add(h1, t7)
+
+ // f[3]
+ f_2 = i32x4.shl(f3, 1)
+ f_4 = i32x4.shl(f3, 2)
+
+ t0 = i64x2.extmul_low_i32x4_s(f_2, f3)
+ t1 = i64x2.extmul_low_i32x4_s(f_2, f4)
+ t2 = i64x2.extmul_low_i32x4_s(f_4, f5)
+ t3 = i64x2.extmul_low_i32x4_s(f_2, f6)
+ t4 = i64x2.extmul_low_i32x4_s(f_4, f7_19) /* 1.959375*2^30 */
+ t5 = i64x2.extmul_low_i32x4_s(f_2, f8_19)
+ t6 = i64x2.extmul_low_i32x4_s(f_4, f9_19)
+
+ h6 = i64x2.add(h6, t0)
+ h7 = i64x2.add(h7, t1)
+ h8 = i64x2.add(h8, t2)
+ h9 = i64x2.add(h9, t3)
+ h0 = i64x2.add(h0, t4) /* 1.959375*2^30 */
+ h1 = i64x2.add(h1, t5)
+ h2 = i64x2.add(h2, t6)
+
+ // f[4]
+ f_2 = i32x4.shl(f4, 1)
+
+ t0 = i64x2.extmul_low_i32x4_s(f4, f4)
+ t1 = i64x2.extmul_low_i32x4_s(f_2, f5)
+ t2 = i64x2.extmul_low_i32x4_s(f_2, f6_19)
+ t3 = i64x2.extmul_low_i32x4_s(f_2, f7_19)
+ t4 = i64x2.extmul_low_i32x4_s(f_2, f8_19)
+ t5 = i64x2.extmul_low_i32x4_s(f_2, f9_19)
+
+ h8 = i64x2.add(h8, t0)
+ h9 = i64x2.add(h9, t1)
+ h0 = i64x2.add(h0, t2)
+ h1 = i64x2.add(h1, t3)
+ h2 = i64x2.add(h2, t4)
+ h3 = i64x2.add(h3, t5)
+
+ // f[5]
+ f_2 = i32x4.shl(f5, 1)
+ f_4 = i32x4.shl(f5, 2)
+
+ t0 = i64x2.extmul_low_i32x4_s(f_2, f5_19)
+ t1 = i64x2.extmul_low_i32x4_s(f_2, f6_19)
+ t2 = i64x2.extmul_low_i32x4_s(f_4, f7_19)
+ t3 = i64x2.extmul_low_i32x4_s(f_2, f8_19)
+ t4 = i64x2.extmul_low_i32x4_s(f_4, f9_19) /* 1.959375*2^30 */
+
+ h0 = i64x2.add(h0, t0)
+ h1 = i64x2.add(h1, t1)
+ h2 = i64x2.add(h2, t2)
+ h3 = i64x2.add(h3, t3)
+ h4 = i64x2.add(h4, t4) /* 1.959375*2^30 */
+
+ // f[6]
+ f_2 = i32x4.shl(f6, 1)
+
+ t0 = i64x2.extmul_low_i32x4_s(f6, f6_19)
+ t1 = i64x2.extmul_low_i32x4_s(f_2, f7_19)
+ t2 = i64x2.extmul_low_i32x4_s(f_2, f8_19)
+ t3 = i64x2.extmul_low_i32x4_s(f_2, f9_19)
+
+ h2 = i64x2.add(h2, t0)
+ h3 = i64x2.add(h3, t1)
+ h4 = i64x2.add(h4, t2)
+ h5 = i64x2.add(h5, t3)
+
+ // f[7]
+ f_2 = i32x4.shl(f7, 1)
+ f_4 = i32x4.shl(f7, 2)
+
+ t0 = i64x2.extmul_low_i32x4_s(f_2, f7_19)
+ t1 = i64x2.extmul_low_i32x4_s(f_2, f8_19)
+ t2 = i64x2.extmul_low_i32x4_s(f_4, f9_19)
+
+ h4 = i64x2.add(h4, t0)
+ h5 = i64x2.add(h5, t1)
+ h6 = i64x2.add(h6, t2)
+
+ // f[8]
+ f_2 = i32x4.shl(f8, 1)
+
+ t0 = i64x2.extmul_low_i32x4_s(f8, f8_19)
+ t1 = i64x2.extmul_low_i32x4_s(f_2, f9_19)
+
+ h6 = i64x2.add(h6, t0)
+ h7 = i64x2.add(h7, t1)
+
+ // f[9]
+ f_2 = i32x4.shl(f9, 1)
+
+ t0 = i64x2.extmul_low_i32x4_s(f_2, f9_19)
+
+ h8 = i64x2.add(h8, t0)
+
+ const v24: v128 = i64x2.splat(1 << 24)
+ const v25: v128 = i64x2.splat(1 << 25)
+ let carry0: v128
+ let carry1: v128
+ let carry2: v128
+ let carry3: v128
+ let carry4: v128
+ let carry5: v128
+ let carry6: v128
+ let carry7: v128
+ let carry8: v128
+ let carry9: v128
+
+ carry0 = i64x2.shr_s(((i64x2.add(h0, v25))), 26)
+ h1 = i64x2.add(h1, carry0)
+ h0 = i64x2.sub(h0, i64x2.shl(carry0, 26))
+ carry4 = i64x2.shr_s(((i64x2.add(h4, v25))), 26)
+ h5 = i64x2.add(h5, carry4)
+ h4 = i64x2.sub(h4, i64x2.shl(carry4, 26))
+
+ carry1 = i64x2.shr_s(((i64x2.add(h1, v24))), 25)
+ h2 = i64x2.add(h2, carry1)
+ h1 = i64x2.sub(h1, i64x2.shl(carry1, 25))
+ carry5 = i64x2.shr_s(((i64x2.add(h5, v24))), 25)
+ h6 = i64x2.add(h6, carry5)
+ h5 = i64x2.sub(h5, i64x2.shl(carry5, 25))
+
+ carry2 = i64x2.shr_s(((i64x2.add(h2, v25))), 26)
+ h3 = i64x2.add(h3, carry2)
+ h2 = i64x2.sub(h2, i64x2.shl(carry2, 26))
+ carry6 = i64x2.shr_s(((i64x2.add(h6, v25))), 26)
+ h7 = i64x2.add(h7, carry6)
+ h6 = i64x2.sub(h6, i64x2.shl(carry6, 26))
+
+ carry3 = i64x2.shr_s(((i64x2.add(h3, v24))), 25)
+ h4 = i64x2.add(h4, carry3)
+ h3 = i64x2.sub(h3, i64x2.shl(carry3, 25))
+ carry7 = i64x2.shr_s(((i64x2.add(h7, v24))), 25)
+ h8 = i64x2.add(h8, carry7)
+ h7 = i64x2.sub(h7, i64x2.shl(carry7, 25))
+
+ carry4 = i64x2.shr_s(((i64x2.add(h4, v25))), 26)
+ h5 = i64x2.add(h5, carry4)
+ h4 = i64x2.sub(h4, i64x2.shl(carry4, 26))
+ carry8 = i64x2.shr_s(((i64x2.add(h8, v25))), 26)
+ h9 = i64x2.add(h9, carry8)
+ h8 = i64x2.sub(h8, i64x2.shl(carry8, 26))
+
+ carry9 = i64x2.shr_s(((i64x2.add(h9, v24))), 25)
+ h0 = i64x2.add(h0, i64x2.mul(carry9, i64x2.splat(19)))
+ h9 = i64x2.sub(h9, i64x2.shl(carry9, 25))
+
+ carry0 = i64x2.shr_s(((i64x2.add(h0, v25))), 26)
+ h1 = i64x2.add(h1, carry0)
+ h0 = i64x2.sub(h0, i64x2.shl(carry0, 26))
+
+ const hx_ptr: usize = changetype<usize>(hx)
+ store<i32>(hx_ptr, i64x2.extract_lane(h0, 0), 0)
+ store<i32>(hx_ptr, i64x2.extract_lane(h1, 0), 4)
+ store<i32>(hx_ptr, i64x2.extract_lane(h2, 0), 8)
+ store<i32>(hx_ptr, i64x2.extract_lane(h3, 0), 12)
+ store<i32>(hx_ptr, i64x2.extract_lane(h4, 0), 16)
+ store<i32>(hx_ptr, i64x2.extract_lane(h5, 0), 20)
+ store<i32>(hx_ptr, i64x2.extract_lane(h6, 0), 24)
+ store<i32>(hx_ptr, i64x2.extract_lane(h7, 0), 28)
+ store<i32>(hx_ptr, i64x2.extract_lane(h8, 0), 32)
+ store<i32>(hx_ptr, i64x2.extract_lane(h9, 0), 36)
+
+ const hy_ptr: usize = changetype<usize>(hy)
+ store<i32>(hy_ptr, i64x2.extract_lane(h0, 1), 0)
+ store<i32>(hy_ptr, i64x2.extract_lane(h1, 1), 4)
+ store<i32>(hy_ptr, i64x2.extract_lane(h2, 1), 8)
+ store<i32>(hy_ptr, i64x2.extract_lane(h3, 1), 12)
+ store<i32>(hy_ptr, i64x2.extract_lane(h4, 1), 16)
+ store<i32>(hy_ptr, i64x2.extract_lane(h5, 1), 20)
+ store<i32>(hy_ptr, i64x2.extract_lane(h6, 1), 24)
+ store<i32>(hy_ptr, i64x2.extract_lane(h7, 1), 28)
+ store<i32>(hy_ptr, i64x2.extract_lane(h8, 1), 32)
+ store<i32>(hy_ptr, i64x2.extract_lane(h9, 1), 36)
}
/**
* Square a FieldElement, double it, and store the result.
*
+ * h = 2 * f * f
+ * Can overlap h with f.
+ *
+ * Preconditions:
+ * |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
+ *
+ * Postconditions:
+ * |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
+ *
* @param h FieldElement power destination
* @param f FieldElement base source
*/
-//@ts-expect-error
-@inline
export function fe_sq2 (h: FieldElement, f: FieldElement): void {
- fe_mul(h, f, f)
- fe_add(h, h, h)
+ const f_ptr: usize = changetype<usize>(f)
+ const f0: i64 = i64(load<i32>(f_ptr, 0))
+ const f1: i64 = i64(load<i32>(f_ptr, 4))
+ const f2: i64 = i64(load<i32>(f_ptr, 8))
+ const f3: i64 = i64(load<i32>(f_ptr, 12))
+ const f4: i64 = i64(load<i32>(f_ptr, 16))
+ const f5: i64 = i64(load<i32>(f_ptr, 20))
+ const f6: i64 = i64(load<i32>(f_ptr, 24))
+ const f7: i64 = i64(load<i32>(f_ptr, 28))
+ const f8: i64 = i64(load<i32>(f_ptr, 32))
+ const f9: i64 = i64(load<i32>(f_ptr, 36))
+
+ const f0_2: i64 = f0 * 2
+ const f1_2: i64 = f1 * 2
+ const f2_2: i64 = f2 * 2
+ const f3_2: i64 = f3 * 2
+ const f4_2: i64 = f4 * 2
+ const f5_2: i64 = f5 * 2
+ const f6_2: i64 = f6 * 2
+ const f7_2: i64 = f7 * 2
+
+ const f5_38: i64 = f5 * 38 /* 1.959375*2^30 */
+ const f6_19: i64 = f6 * 19 /* 1.959375*2^30 */
+ const f7_38: i64 = f7 * 38/* 1.959375*2^30 */
+ const f8_19: i64 = f8 * 19/* 1.959375*2^30 */
+ const f9_38: i64 = f9 * 38/* 1.959375*2^30 */
+
+ const f0f0: i64 = f0 * f0
+ const f0f1_2: i64 = f0_2 * f1
+ const f0f2_2: i64 = f0_2 * f2
+ const f0f3_2: i64 = f0_2 * f3
+ const f0f4_2: i64 = f0_2 * f4
+ const f0f5_2: i64 = f0_2 * f5
+ const f0f6_2: i64 = f0_2 * f6
+ const f0f7_2: i64 = f0_2 * f7
+ const f0f8_2: i64 = f0_2 * f8
+ const f0f9_2: i64 = f0_2 * f9
+
+ const f1f1_2: i64 = f1_2 * f1
+ const f1f2_2: i64 = f1_2 * f2
+ const f1f3_4: i64 = f1_2 * f3_2
+ const f1f4_2: i64 = f1_2 * f4
+ const f1f5_4: i64 = f1_2 * f5_2
+ const f1f6_2: i64 = f1_2 * f6
+ const f1f7_4: i64 = f1_2 * f7_2
+ const f1f8_2: i64 = f1_2 * f8
+ const f1f9_76: i64 = f1_2 * f9_38
+
+ const f2f2: i64 = f2 * f2
+ const f2f3_2: i64 = f2_2 * f3
+ const f2f4_2: i64 = f2_2 * f4
+ const f2f5_2: i64 = f2_2 * f5
+ const f2f6_2: i64 = f2_2 * f6
+ const f2f7_2: i64 = f2_2 * f7
+ const f2f8_38: i64 = f2_2 * f8_19
+ const f2f9_38: i64 = f2 * f9_38
+
+ const f3f3_2: i64 = f3_2 * f3
+ const f3f4_2: i64 = f3_2 * f4
+ const f3f5_4: i64 = f3_2 * f5_2
+ const f3f6_2: i64 = f3_2 * f6
+ const f3f7_76: i64 = f3_2 * f7_38
+ const f3f8_38: i64 = f3_2 * f8_19
+ const f3f9_76: i64 = f3_2 * f9_38
+
+ const f4f4: i64 = f4 * f4
+ const f4f5_2: i64 = f4_2 * f5
+ const f4f6_38: i64 = f4_2 * f6_19
+ const f4f7_38: i64 = f4 * f7_38
+ const f4f8_38: i64 = f4_2 * f8_19
+ const f4f9_38: i64 = f4 * f9_38
+
+ const f5f5_38: i64 = f5 * f5_38
+ const f5f6_38: i64 = f5_2 * f6_19
+ const f5f7_76: i64 = f5_2 * f7_38
+ const f5f8_38: i64 = f5_2 * f8_19
+ const f5f9_76: i64 = f5_2 * f9_38
+
+ const f6f6_19: i64 = f6 * f6_19
+ const f6f7_38: i64 = f6 * f7_38
+ const f6f8_38: i64 = f6_2 * f8_19
+ const f6f9_38: i64 = f6 * f9_38
+
+ const f7f7_38: i64 = f7 * f7_38
+ const f7f8_38: i64 = f7_2 * f8_19
+ const f7f9_76: i64 = f7_2 * f9_38
+
+ const f8f8_19: i64 = f8 * f8_19
+ const f8f9_38: i64 = f8 * f9_38
+
+ const f9f9_38: i64 = f9 * f9_38
+
+ let h0: i64 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
+ let h1: i64 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
+ let h2: i64 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
+ let h3: i64 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
+ let h4: i64 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
+ let h5: i64 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
+ let h6: i64 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
+ let h7: i64 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
+ let h8: i64 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
+ let h9: i64 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
+
+ h0 *= 2
+ h1 *= 2
+ h2 *= 2
+ h3 *= 2
+ h4 *= 2
+ h5 *= 2
+ h6 *= 2
+ h7 *= 2
+ h8 *= 2
+ h9 *= 2
+
+ let carry0: i64
+ let carry1: i64
+ let carry2: i64
+ let carry3: i64
+ let carry4: i64
+ let carry5: i64
+ let carry6: i64
+ let carry7: i64
+ let carry8: i64
+ let carry9: i64
+
+ carry0 = (h0 + i64(1 << 25)) >> 26
+ h1 += carry0
+ h0 -= carry0 << 26
+ carry4 = (h4 + i64(1 << 25)) >> 26
+ h5 += carry4
+ h4 -= carry4 << 26
+
+ carry1 = (h1 + i64(1 << 24)) >> 25
+ h2 += carry1
+ h1 -= carry1 << 25
+ carry5 = (h5 + i64(1 << 24)) >> 25
+ h6 += carry5
+ h5 -= carry5 << 25
+
+ carry2 = (h2 + i64(1 << 25)) >> 26
+ h3 += carry2
+ h2 -= carry2 << 26
+ carry6 = (h6 + i64(1 << 25)) >> 26
+ h7 += carry6
+ h6 -= carry6 << 26
+
+ carry3 = (h3 + i64(1 << 24)) >> 25
+ h4 += carry3
+ h3 -= carry3 << 25
+ carry7 = (h7 + i64(1 << 24)) >> 25
+ h8 += carry7
+ h7 -= carry7 << 25
+
+ carry4 = (h4 + i64(1 << 25)) >> 26
+ h5 += carry4
+ h4 -= carry4 << 26
+ carry8 = (h8 + i64(1 << 25)) >> 26
+ h9 += carry8
+ h8 -= carry8 << 26
+
+ carry9 = (h9 + i64(1 << 24)) >> 25
+ h0 += carry9 * 19
+ h9 -= carry9 << 25
+
+ carry0 = (h0 + i64(1 << 25)) >> 26
+ h1 += carry0
+ h0 -= carry0 << 26
+
+ const h_ptr: usize = changetype<usize>(h)
+ store<i32>(h_ptr, h0, 0)
+ store<i32>(h_ptr, h1, 4)
+ store<i32>(h_ptr, h2, 8)
+ store<i32>(h_ptr, h3, 12)
+ store<i32>(h_ptr, h4, 16)
+ store<i32>(h_ptr, h5, 20)
+ store<i32>(h_ptr, h6, 24)
+ store<i32>(h_ptr, h7, 28)
+ store<i32>(h_ptr, h8, 32)
+ store<i32>(h_ptr, h9, 36)
}
/**
//@ts-expect-error
@inline
export function fe_sub (h: FieldElement, f: FieldElement, g: FieldElement): void {
- const o = changetype<usize>(h)
- const a = changetype<usize>(f)
- const b = changetype<usize>(g)
-
- let ai = v128.load(a)
- let bi = v128.load(b)
- v128.store(o, v128.sub<i32>(ai, bi))
-
- ai = v128.load(a, 16)
- bi = v128.load(b, 16)
- v128.store(o, v128.sub<i32>(ai, bi), 16)
-
- ai = i64x2(load<i64>(a + 32), 0)
- bi = i64x2(load<i64>(b + 32), 0)
- v128.store_lane<i64>(o, v128.sub<i32>(ai, bi), 0, 32)
+ const f_ptr = changetype<usize>(f)
+ const g_ptr = changetype<usize>(g)
+ const h_ptr = changetype<usize>(h)
+ v128.store(h_ptr, v128.sub<i32>(v128.load(f_ptr, 0), v128.load(g_ptr, 0)), 0)
+ v128.store(h_ptr, v128.sub<i32>(v128.load(f_ptr, 16), v128.load(g_ptr, 16)), 16)
+ v128.store_lane<i64>(h_ptr, v128.sub<i32>(v128.load(f_ptr, 32), v128.load(g_ptr, 32)), 0, 32)
}
const fe_tobytes_t: FieldElement = fe()
export function fe_tobytes (s: StaticArray<u8>, h: FieldElement): void {
const t = fe_tobytes_t
fe_reduce(t, h)
- s[0] = u8(t[0] >> 0)
- s[1] = u8(t[0] >> 8)
- s[2] = u8(t[0] >> 16)
- s[3] = u8((t[0] >> 24) | (t[1] * (u32(1) << 2)))
- s[4] = u8(t[1] >> 6)
- s[5] = u8(t[1] >> 14)
- s[6] = u8((t[1] >> 22) | (t[2] * (u32(1) << 3)))
- s[7] = u8(t[2] >> 5)
- s[8] = u8(t[2] >> 13)
- s[9] = u8((t[2] >> 21) | (t[3] * (u32(1) << 5)))
- s[10] = u8(t[3] >> 3)
- s[11] = u8(t[3] >> 11)
- s[12] = u8((t[3] >> 19) | (t[4] * (u32(1) << 6)))
- s[13] = u8(t[4] >> 2)
- s[14] = u8(t[4] >> 10)
- s[15] = u8(t[4] >> 18)
- s[16] = u8(t[5] >> 0)
- s[17] = u8(t[5] >> 8)
- s[18] = u8(t[5] >> 16)
- s[19] = u8((t[5] >> 24) | (t[6] * (u32(1) << 1)))
- s[20] = u8(t[6] >> 7)
- s[21] = u8(t[6] >> 15)
- s[22] = u8((t[6] >> 23) | (t[7] * (u32(1) << 3)))
- s[23] = u8(t[7] >> 5)
- s[24] = u8(t[7] >> 13)
- s[25] = u8((t[7] >> 21) | (t[8] * (u32(1) << 4)))
- s[26] = u8(t[8] >> 4)
- s[27] = u8(t[8] >> 12)
- s[28] = u8((t[8] >> 20) | (t[9] * (u32(1) << 6)))
- s[29] = u8(t[9] >> 2)
- s[30] = u8(t[9] >> 10)
- s[31] = u8(t[9] >> 18)
-}
-
-/**
- * Internal function.
- *
- * Normalize results of `fe_multiply` and `fe_square` across limbs. This
- * requires a reduction step and two carry steps.
- *
- * // reduce
- * for (let i = 0; i < 15; i++) {
- * t[i] += 38 * t[i + 16]
- * }
- * // carry twice
- * for (let i = 0; i < 2; i++) {
- * for (let i = 0; i < 16; i++) {
- * c += t[i]
- * t[i] = c & 0xFFFF
- * c >>= 16
- * }
- * t[0] += 38 * c
- * }
-*/
-//@ts-expect-error
-@inline
-function normalize (o: usize, t: usize): void {
- // reduce from 20 limbs to 10
- let x = load<i64>(t)
- let y = load<i64>(t, 80)
- store<i64>(t, x + (19 * y))
-
- x = load<i64>(t, 8)
- y = load<i64>(t, 88)
- store<i64>(t, x + (19 * y), 8)
-
- x = load<i64>(t, 16)
- y = load<i64>(t, 96)
- store<i64>(t, x + (19 * y), 16)
-
- x = load<i64>(t, 24)
- y = load<i64>(t, 104)
- store<i64>(t, x + (19 * y), 24)
-
- x = load<i64>(t, 32)
- y = load<i64>(t, 112)
- store<i64>(t, x + (19 * y), 32)
-
- x = load<i64>(t, 40)
- y = load<i64>(t, 120)
- store<i64>(t, x + (19 * y), 40)
-
- x = load<i64>(t, 48)
- y = load<i64>(t, 128)
- store<i64>(t, x + (19 * y), 48)
-
- x = load<i64>(t, 56)
- y = load<i64>(t, 136)
- store<i64>(t, x + (19 * y), 56)
-
- x = load<i64>(t, 64)
- y = load<i64>(t, 144)
- store<i64>(t, x + (19 * y), 64)
-
- x = load<i64>(t, 72)
- y = load<i64>(t, 152)
- store<i64>(t, x + (19 * y), 72)
-
- // first carry
- let v: i64 = load<i64>(t, 0)
- let c: i64 = (v + (1 << 25)) >> 26
- store<i64>(t, v - (c << 26), 0)
-
- v = load<i64>(t, 8) + c
- c = (v + (1 << 24)) >> 25
- store<i64>(t, v - (c << 25), 8)
-
- v = load<i64>(t, 16) + c
- c = (v + (1 << 25)) >> 26
- store<i64>(t, v - (c << 26), 16)
-
- v = load<i64>(t, 24) + c
- c = (v + (1 << 24)) >> 25
- store<i64>(t, v - (c << 25), 24)
-
- v = load<i64>(t, 32) + c
- c = (v + (1 << 25)) >> 26
- store<i64>(t, v - (c << 26), 32)
-
- v = load<i64>(t, 40) + c
- c = (v + (1 << 24)) >> 25
- store<i64>(t, v - (c << 25), 40)
-
- v = load<i64>(t, 48) + c
- c = (v + (1 << 25)) >> 26
- store<i64>(t, v - (c << 26), 48)
-
- v = load<i64>(t, 56) + c
- c = (v + (1 << 24)) >> 25
- store<i64>(t, v - (c << 25), 56)
-
- v = load<i64>(t, 64) + c
- c = (v + (1 << 25)) >> 26
- store<i64>(t, v - (c << 26), 64)
-
- v = load<i64>(t, 72) + c
- c = (v + (1 << 24)) >> 25
- store<i64>(t, v - (c << 25), 72)
-
- // fold final carry back to start then do second carry and assign to output
- v = load<i64>(t, 0) + (19 * c)
- c = (v + (1 << 25)) >> 26
- store<i32>(o, v - (c << 26), 0)
-
- v = load<i64>(t, 8) + c
- c = (v + (1 << 24)) >> 25
- store<i32>(o, v - (c << 25), 4)
-
- v = load<i64>(t, 16) + c
- c = (v + (1 << 25)) >> 26
- store<i32>(o, v - (c << 26), 8)
-
- v = load<i64>(t, 24) + c
- c = (v + (1 << 24)) >> 25
- store<i32>(o, v - (c << 25), 12)
-
- v = load<i64>(t, 32) + c
- c = (v + (1 << 25)) >> 26
- store<i32>(o, v - (c << 26), 16)
-
- v = load<i64>(t, 40) + c
- c = (v + (1 << 24)) >> 25
- store<i32>(o, v - (c << 25), 20)
-
- v = load<i64>(t, 48) + c
- c = (v + (1 << 25)) >> 26
- store<i32>(o, v - (c << 26), 24)
-
- v = load<i64>(t, 56) + c
- c = (v + (1 << 24)) >> 25
- store<i32>(o, v - (c << 25), 28)
-
- v = load<i64>(t, 64) + c
- c = (v + (1 << 25)) >> 26
- store<i32>(o, v - (c << 26), 32)
-
- v = load<i64>(t, 72) + c
- c = (v + (1 << 24)) >> 25
- store<i32>(o, v - (c << 25), 36)
+ const t_ptr: usize = changetype<usize>(t)
+ const t0 = load<i32>(t_ptr, 0)
+ const t1 = load<i32>(t_ptr, 4)
+ const t2 = load<i32>(t_ptr, 8)
+ const t3 = load<i32>(t_ptr, 12)
+ const t4 = load<i32>(t_ptr, 16)
+ const t5 = load<i32>(t_ptr, 20)
+ const t6 = load<i32>(t_ptr, 24)
+ const t7 = load<i32>(t_ptr, 28)
+ const t8 = load<i32>(t_ptr, 32)
+ const t9 = load<i32>(t_ptr, 36)
+
+ const s_ptr: usize = changetype<usize>(s)
+ store<u8>(s_ptr, t0 >> 0, 0)
+ store<u8>(s_ptr, t0 >> 8, 1)
+ store<u8>(s_ptr, t0 >> 16, 2)
+ store<u8>(s_ptr, (t0 >> 24) | (t1 << 2), 3)
+ store<u8>(s_ptr, t1 >> 6, 4)
+ store<u8>(s_ptr, t1 >> 14, 5)
+ store<u8>(s_ptr, (t1 >> 22) | (t2 << 3), 6)
+ store<u8>(s_ptr, t2 >> 5, 7)
+ store<u8>(s_ptr, t2 >> 13, 8)
+ store<u8>(s_ptr, (t2 >> 21) | (t3 << 5), 9)
+ store<u8>(s_ptr, t3 >> 3, 10)
+ store<u8>(s_ptr, t3 >> 11, 11)
+ store<u8>(s_ptr, (t3 >> 19) | (t4 << 6), 12)
+ store<u8>(s_ptr, t4 >> 2, 13)
+ store<u8>(s_ptr, t4 >> 10, 14)
+ store<u8>(s_ptr, t4 >> 18, 15)
+ store<u8>(s_ptr, t5 >> 0, 16)
+ store<u8>(s_ptr, t5 >> 8, 17)
+ store<u8>(s_ptr, t5 >> 16, 18)
+ store<u8>(s_ptr, (t5 >> 24) | (t6 << 1), 19)
+ store<u8>(s_ptr, t6 >> 7, 20)
+ store<u8>(s_ptr, t6 >> 15, 21)
+ store<u8>(s_ptr, (t6 >> 23) | (t7 << 3), 22)
+ store<u8>(s_ptr, t7 >> 5, 23)
+ store<u8>(s_ptr, t7 >> 13, 24)
+ store<u8>(s_ptr, (t7 >> 21) | (t8 << 4), 25)
+ store<u8>(s_ptr, t8 >> 4, 26)
+ store<u8>(s_ptr, t8 >> 12, 27)
+ store<u8>(s_ptr, (t8 >> 20) | (t9 << 6), 28)
+ store<u8>(s_ptr, t9 >> 2, 29)
+ store<u8>(s_ptr, t9 >> 10, 30)
+ store<u8>(s_ptr, t9 >> 18, 31)
}
* - ge_precomp (Duif): (y+x,y-x,2dxy)
*/
import { base, base2 } from './base'
-import { ed25519_d, ed25519_d2, fe25519_sqrtm1 } from './constants'
-import { fe, fe_0, fe_1, fe_add, fe_cmov, fe_copy, fe_frombytes, fe_invert, fe_isnegative, fe_iszero, fe_mul, fe_neg, fe_pow22523, fe_sq, fe_sq2, fe_sub, fe_tobytes, FieldElement } from './fe'
+import { ed25519_d, fe25519_sqrtm1 } from './constants'
+import { fe, fe_1, fe_add, fe_cmov, fe_copy, fe_frombytes, fe_isnegative, fe_iszero, fe_mul, fe_neg, fe_pow22523, fe_sq, fe_sub, FieldElement } from './fe'
+import { ge_add_cached, ge_add_precomp, ge_cached, ge_p1p1, ge_p1p1_to_p2, ge_p1p1_to_p3, ge_p2, ge_p2_0, ge_p2_dbl, ge_p2_to_p3, ge_p3, ge_p3_0, ge_p3_dbl, ge_p3_to_cached, ge_p3_tobytes, ge_precomp, ge_precomp_0, ge_sub_cached, ge_sub_precomp } from './p'
import { equal, negative, optblocker_u8 } from './utils'
-export class ge_p2 {
- X: FieldElement = fe()
- Y: FieldElement = fe()
- Z: FieldElement = fe()
-}
-
-export class ge_p3 extends ge_p2 {
- T: FieldElement = fe()
- __brand: 'ge_p3' = 'ge_p3'
-}
-
-export class ge_p1p1 extends ge_p2 {
- T: FieldElement = fe()
- __brand: 'ge_p1p1' = 'ge_p1p1'
-}
-
-export class ge_precomp {
- yplusx: FieldElement = fe()
- yminusx: FieldElement = fe()
- xy2d: FieldElement = fe()
-}
-
-export class ge_cached {
- YplusX: FieldElement = fe()
- YminusX: FieldElement = fe()
- Z: FieldElement = fe()
- T2d: FieldElement = fe()
-}
-
-//@ts-expect-error
-@inline
-function ge_cached_8 (): StaticArray<ge_cached> {
- return StaticArray.fromArray<ge_cached>([
- new ge_cached(),
- new ge_cached(),
- new ge_cached(),
- new ge_cached(),
- new ge_cached(),
- new ge_cached(),
- new ge_cached(),
- new ge_cached()
- ])
-}
-
-//@ts-expect-error
-@inline
-export function ge_p2_0 (h: ge_p2): void {
- fe_0(h.X)
- fe_1(h.Y)
- fe_1(h.Z)
-}
-
-//@ts-expect-error
-@inline
-export function ge_p3_0 (h: ge_p3): void {
- ge_p2_0(h)
- fe_0(h.T)
-}
-
//@ts-expect-error
@inline
-export function ge_precomp_0 (h: ge_precomp): void {
- fe_1(h.yplusx)
- fe_1(h.yminusx)
- fe_0(h.xy2d)
-}
-
function ge_cmov (t: ge_precomp, u: ge_precomp, b: u8): void {
fe_cmov(t.yplusx, u.yplusx, b)
fe_cmov(t.yminusx, u.yminusx, b)
ge_cmov(t, minust, bnegative)
}
+//@ts-expect-error
+@inline
function ge_cmov8_base (t: ge_precomp, pos: i32, b: i8): void {
ge_cmov8(t, base[pos], b)
}
-const ge_add_precomp_t0: FieldElement = fe()
-/**
- * r = p + q
- */
-function ge_add_precomp (r: ge_p1p1, p: ge_p3, q: ge_precomp): void {
- const t0 = ge_add_precomp_t0
- fe_add(r.X, p.Y, p.X)
- fe_sub(r.Y, p.Y, p.X)
- fe_mul(r.Z, r.X, q.yplusx)
- fe_mul(r.Y, r.Y, q.yminusx)
- fe_mul(r.T, q.xy2d, p.T)
- fe_add(t0, p.Z, p.Z)
- fe_sub(r.X, r.Z, r.Y)
- fe_add(r.Y, r.Z, r.Y)
- fe_add(r.Z, t0, r.T)
- fe_sub(r.T, t0, r.T)
-}
-
-function ge_p1p1_to_p3 (r: ge_p3, p: ge_p1p1): void {
- fe_mul(r.X, p.X, p.T)
- fe_mul(r.Y, p.Y, p.Z)
- fe_mul(r.Z, p.Z, p.T)
- fe_mul(r.T, p.X, p.Y)
-}
-
-function ge_p3_to_p2 (r: ge_p2, p: ge_p3): void {
- fe_copy(r.X, p.X)
- fe_copy(r.Y, p.Y)
- fe_copy(r.Z, p.Z)
-}
-
-const ge_p2_dbl_t0: FieldElement = fe()
-function ge_p2_dbl (r: ge_p1p1, p: ge_p2): void {
- const t0 = ge_p2_dbl_t0
- fe_sq(r.X, p.X)
- fe_sq(r.Z, p.Y)
- fe_sq2(r.T, p.Z)
- fe_add(r.Y, p.X, p.Y)
- fe_sq(t0, r.Y)
- fe_add(r.Y, r.Z, r.X)
- fe_sub(r.Z, r.Z, r.X)
- fe_sub(r.X, t0, r.Y)
- fe_sub(r.T, r.T, r.Z)
-}
-
-const ge_p3_dbl_q: ge_p2 = new ge_p2()
-/**
- * r = 2 * p
- */
-function ge_p3_dbl (r: ge_p1p1, p: ge_p3): void {
- const q = ge_p3_dbl_q
- ge_p3_to_p2(q, p)
- ge_p2_dbl(r, q)
-}
-
-/**
- * r = p
- */
-function ge_p1p1_to_p2 (r: ge_p2, p: ge_p1p1): void {
- fe_mul(r.X, p.X, p.T)
- fe_mul(r.Y, p.Y, p.Z)
- fe_mul(r.Z, p.Z, p.T)
-}
-
-/**
- * r = p
- */
-function ge_p3_to_cached (r: ge_cached, p: ge_p3): void {
- fe_add(r.YplusX, p.Y, p.X)
- fe_sub(r.YminusX, p.Y, p.X)
- fe_copy(r.Z, p.Z)
- fe_mul(r.T2d, p.T, ed25519_d2)
-}
-
-const ge_add_cached_t0: FieldElement = fe()
-/**
- * r = p + q
- */
-function ge_add_cached (r: ge_p1p1, p: ge_p3, q: ge_cached): void {
- const t0 = ge_add_cached_t0
- fe_add(r.X, p.Y, p.X)
- fe_sub(r.Y, p.Y, p.X)
- fe_mul(r.Z, r.X, q.YplusX)
- fe_mul(r.Y, r.Y, q.YminusX)
- fe_mul(r.T, q.T2d, p.T)
- fe_mul(r.X, p.Z, q.Z)
- fe_add(t0, r.X, r.X)
- fe_sub(r.X, r.Z, r.Y)
- fe_add(r.Y, r.Z, r.Y)
- fe_add(r.Z, t0, r.T)
- fe_sub(r.T, t0, r.T)
-}
-
-function ge_cached_0 (h: ge_cached): void {
- fe_1(h.YplusX)
- fe_1(h.YminusX)
- fe_1(h.Z)
- fe_0(h.T2d)
-}
-
-function ge_cmov_cached (t: ge_cached, u: ge_cached, b: u8): void {
- fe_cmov(t.YplusX, u.YplusX, b)
- fe_cmov(t.YminusX, u.YminusX, b)
- fe_cmov(t.Z, u.Z, b)
- fe_cmov(t.T2d, u.T2d, b)
-}
-
-const ge_cmov8_cached_minust: ge_cached = new ge_cached()
-function ge_cmov8_cached (t: ge_cached, cached: StaticArray<ge_cached>, b: i8): void {
- const minust = ge_cmov8_cached_minust
- const bnegative: u8 = negative(b)
- const babs: u8 = b - (((-bnegative) & b) << 1)
-
- ge_cached_0(t)
- ge_cmov_cached(t, cached[0], equal(babs, 1))
- ge_cmov_cached(t, cached[1], equal(babs, 2))
- ge_cmov_cached(t, cached[2], equal(babs, 3))
- ge_cmov_cached(t, cached[3], equal(babs, 4))
- ge_cmov_cached(t, cached[4], equal(babs, 5))
- ge_cmov_cached(t, cached[5], equal(babs, 6))
- ge_cmov_cached(t, cached[6], equal(babs, 7))
- ge_cmov_cached(t, cached[7], equal(babs, 8))
- fe_copy(minust.YplusX, t.YminusX)
- fe_copy(minust.YminusX, t.YplusX)
- fe_copy(minust.Z, t.Z)
- fe_neg(minust.T2d, t.T2d)
- ge_cmov_cached(t, minust, bnegative)
-}
-
-const ge_scalarmult_e: StaticArray<i8> = new StaticArray<i8>(64)
-const ge_scalarmult_r: ge_p1p1 = new ge_p1p1()
-const ge_scalarmult_s: ge_p2 = new ge_p2()
-const ge_scalarmult_t2: ge_p1p1 = new ge_p1p1()
-const ge_scalarmult_t3: ge_p1p1 = new ge_p1p1()
-const ge_scalarmult_t4: ge_p1p1 = new ge_p1p1()
-const ge_scalarmult_t5: ge_p1p1 = new ge_p1p1()
-const ge_scalarmult_t6: ge_p1p1 = new ge_p1p1()
-const ge_scalarmult_t7: ge_p1p1 = new ge_p1p1()
-const ge_scalarmult_t8: ge_p1p1 = new ge_p1p1()
-const ge_scalarmult_p2: ge_p3 = new ge_p3()
-const ge_scalarmult_p3: ge_p3 = new ge_p3()
-const ge_scalarmult_p4: ge_p3 = new ge_p3()
-const ge_scalarmult_p5: ge_p3 = new ge_p3()
-const ge_scalarmult_p6: ge_p3 = new ge_p3()
-const ge_scalarmult_p7: ge_p3 = new ge_p3()
-const ge_scalarmult_p8: ge_p3 = new ge_p3()
-const ge_scalarmult_pi: StaticArray<ge_cached> = ge_cached_8()
-const ge_scalarmult_t: ge_cached = new ge_cached()
-/**
- * `h = a * p`
- *
- * where
- *
- * `a = 256⁰a[0] + 256¹a[1] + ... + 256³¹a[31]`
- *
- * Preconditions:
- * - `a[31] <= 127`
- * - `p` is public
- */
-export function ge_scalarmult (h: ge_p3, a: StaticArray<u8>, p: ge_p3): void {
- const e = ge_scalarmult_e
- const r = ge_scalarmult_r
- const s = ge_scalarmult_s
- const t2 = ge_scalarmult_t2
- const t3 = ge_scalarmult_t3
- const t4 = ge_scalarmult_t4
- const t5 = ge_scalarmult_t5
- const t6 = ge_scalarmult_t6
- const t7 = ge_scalarmult_t7
- const t8 = ge_scalarmult_t8
- const p2 = ge_scalarmult_p2
- const p3 = ge_scalarmult_p3
- const p4 = ge_scalarmult_p4
- const p5 = ge_scalarmult_p5
- const p6 = ge_scalarmult_p6
- const p7 = ge_scalarmult_p7
- const p8 = ge_scalarmult_p8
- const pi = ge_scalarmult_pi
- const t = ge_scalarmult_t
- let carry: i8
- let i: i8
-
- ge_p3_to_cached(pi[1 - 1], p) /* p */
-
- ge_p3_dbl(t2, p)
- ge_p1p1_to_p3(p2, t2)
- ge_p3_to_cached(pi[2 - 1], p2) /* 2p = 2*p */
-
- ge_add_cached(t3, p, pi[2 - 1])
- ge_p1p1_to_p3(p3, t3)
- ge_p3_to_cached(pi[3 - 1], p3) /* 3p = 2p+p */
-
- ge_p3_dbl(t4, p2)
- ge_p1p1_to_p3(p4, t4)
- ge_p3_to_cached(pi[4 - 1], p4) /* 4p = 2*2p */
-
- ge_add_cached(t5, p, pi[4 - 1])
- ge_p1p1_to_p3(p5, t5)
- ge_p3_to_cached(pi[5 - 1], p5) /* 5p = 4p+p */
-
- ge_p3_dbl(t6, p3)
- ge_p1p1_to_p3(p6, t6)
- ge_p3_to_cached(pi[6 - 1], p6) /* 6p = 2*3p */
-
- ge_add_cached(t7, p, pi[6 - 1])
- ge_p1p1_to_p3(p7, t7)
- ge_p3_to_cached(pi[7 - 1], p7) /* 7p = 6p+p */
-
- ge_p3_dbl(t8, p4)
- ge_p1p1_to_p3(p8, t8)
- ge_p3_to_cached(pi[8 - 1], p8) /* 8p = 2*4p */
-
- for (i = 0; i < 32; ++i) {
- e[2 * i + 0] = (a[i] >> 0) & 15
- e[2 * i + 1] = (a[i] >> 4) & 15
- }
- /* each e[i] is between 0 and 15 */
- /* e[63] is between 0 and 7 */
-
- carry = 0
- for (i = 0; i < 63; ++i) {
- e[i] += carry
- carry = e[i] + 8
- carry >>= 4
- e[i] -= i8(carry << 4)
- }
- e[63] += carry
- /* each e[i] is between -8 and 8 */
-
- ge_p3_0(h)
-
- for (i = 63; i != 0; i--) {
- ge_cmov8_cached(t, pi, e[i])
- ge_add_cached(r, h, t)
-
- ge_p1p1_to_p2(s, r)
- ge_p2_dbl(r, s)
- ge_p1p1_to_p2(s, r)
- ge_p2_dbl(r, s)
- ge_p1p1_to_p2(s, r)
- ge_p2_dbl(r, s)
- ge_p1p1_to_p2(s, r)
- ge_p2_dbl(r, s)
-
- ge_p1p1_to_p3(h, r) /* *16 */
- }
- ge_cmov8_cached(t, pi, e[i])
- ge_add_cached(r, h, t)
-
- ge_p1p1_to_p3(h, r)
-}
-
-const ge_scalarmult_base_e: StaticArray<i8> = new StaticArray<i8>(64)
-const ge_scalarmult_base_r: ge_p1p1 = new ge_p1p1()
-const ge_scalarmult_base_s: ge_p2 = new ge_p2()
-const ge_scalarmult_base_t: ge_precomp = new ge_precomp()
-/**
- * `h = a * B` (with precomputation)
- *
- * where
- *
- * `a = 256⁰a[0] + 256¹a[1] + ... + 256³¹a[31]`
- *
- * B is the Ed25519 base point `(x,⅘)` with `x` positive. As bytes:
- *
- * `0x5866666666666666666666666666666666666666666666666666666666666666`
- *
- * Preconditions:
- * - `a[31] <= 127`
- */
-export function ge_scalarmult_base (h: ge_p3, a: StaticArray<u8>): void {
- const e = ge_scalarmult_base_e
- const r = ge_scalarmult_base_r
- const s = ge_scalarmult_base_s
- const t = ge_scalarmult_base_t
-
- for (let i = 0; i < 32; ++i) {
- e[(i << 1) + 0] = (a[i] >> 0) & 15
- e[(i << 1) + 1] = (a[i] >> 4) & 15
- }
- /* each e[i] is between 0 and 15 */
- /* e[63] is between 0 and 7 */
-
- let carry: i8 = 0
- for (let i = 0; i < 63; ++i) {
- e[i] += carry
- carry = e[i] + 8
- carry >>= 4
- e[i] -= carry * (i8(1) << 4)
- }
- e[63] += carry
- /* each e[i] is between -8 and 8 */
-
- ge_p3_0(h)
-
- for (let i = 1; i < 64; i += 2) {
- ge_cmov8_base(t, i / 2, e[i])
- ge_add_precomp(r, h, t)
- ge_p1p1_to_p3(h, r)
- }
-
- ge_p3_dbl(r, h)
- ge_p1p1_to_p2(s, r)
- ge_p2_dbl(r, s)
- ge_p1p1_to_p2(s, r)
- ge_p2_dbl(r, s)
- ge_p1p1_to_p2(s, r)
- ge_p2_dbl(r, s)
- ge_p1p1_to_p3(h, r)
-
- for (let i = 0; i < 64; i += 2) {
- ge_cmov8_base(t, i / 2, e[i])
- ge_add_precomp(r, h, t)
- ge_p1p1_to_p3(h, r)
- }
-}
-
-const ge_p3_tobytes_recip: FieldElement = fe()
-const ge_p3_tobytes_x: FieldElement = fe()
-const ge_p3_tobytes_y: FieldElement = fe()
-export function ge_p3_tobytes (s: StaticArray<u8>, h: ge_p3): void {
- const recip = ge_p3_tobytes_recip
- const x = ge_p3_tobytes_x
- const y = ge_p3_tobytes_y
- fe_invert(recip, h.Z)
- fe_mul(x, h.X, recip)
- fe_mul(y, h.Y, recip)
- fe_tobytes(s, y)
- s[31] ^= fe_isnegative(x) << 7
-}
-
-/**
- * true if `s < p`
- */
-export function ge_is_canonical (s: StaticArray<u8>): u8 {
- let c: u8 = (s[31] & 0x7f) ^ 0x7f
- for (let i = 30; i > 0; i--) {
- c |= s[i] ^ 0xff
- }
- c = (c - 1) >> 8
- const d: u8 = (0xec - s[0]) >> 8
- return 1 - (c & d & 1)
-}
-
const u: FieldElement = fe()
const v: FieldElement = fe()
const v3: FieldElement = fe()
fe_mul(h.X, h.X, fe25519_sqrtm1)
}
- if (fe_isnegative(h.X) == (s[31] >> 7)) { /* vartime function - compiler optimization is fine */
+ if (fe_isnegative(h.X) == (load<u8>(changetype<usize>(s), 31) >> 7)) { /* vartime function - compiler optimization is fine */
fe_neg(h.X, h.X)
}
fe_mul(h.T, h.X, h.Y)
}
/**
- * r = p
+ * true if `s < p`
*/
-export function ge_p2_to_p3 (r: ge_p3, p: ge_p2): void {
- fe_copy(r.X, p.X)
- fe_copy(r.Y, p.Y)
- fe_copy(r.Z, p.Z)
- fe_mul(r.T, p.X, p.Y)
+export function ge_is_canonical (s: StaticArray<u8>): u8 {
+ let c: u8 = (s[31] & 0x7f) ^ 0x7f
+ for (let i = 30; i > 0; i--) {
+ c |= s[i] ^ 0xff
+ }
+ c = (c - 1) >> 8
+ const d: u8 = (0xec - s[0]) >> 8
+ return 1 - (c & d & 1)
}
-const ge_p3_sub_q_neg: ge_p3 = new ge_p3()
-/* r = p-q */
-export function ge_p3_sub (r: ge_p3, p: ge_p3, q: ge_p3): void {
- const q_neg = ge_p3_sub_q_neg
- ge_p3_neg(q_neg, q)
- ge_p3_add(r, p, q_neg)
-}
+const ge_scalarmult_base_e: StaticArray<i8> = new StaticArray<i8>(64)
+const ge_scalarmult_base_r: ge_p1p1 = new ge_p1p1()
+const ge_scalarmult_base_s: ge_p2 = new ge_p2()
+const ge_scalarmult_base_t: ge_precomp = new ge_precomp()
+/**
+ * `h = a * B` (with precomputation)
+ *
+ * where
+ *
+ * `a = 256⁰a[0] + 256¹a[1] + ... + 256³¹a[31]`
+ *
+ * B is the Ed25519 base point `(x,⅘)` with `x` positive. As bytes:
+ *
+ * `0x5866666666666666666666666666666666666666666666666666666666666666`
+ *
+ * Preconditions:
+ * - `a[31] <= 127`
+ */
+function ge_scalarmult_base (h: ge_p3, a: StaticArray<u8>): void {
+ const e = ge_scalarmult_base_e
+ const r = ge_scalarmult_base_r
+ const s = ge_scalarmult_base_s
+ const t = ge_scalarmult_base_t
+
+ for (let i = 0; i < 32; ++i) {
+ e[(i << 1) + 0] = (a[i] >> 0) & 15
+ e[(i << 1) + 1] = (a[i] >> 4) & 15
+ }
+ /* each e[i] is between 0 and 15 */
+ /* e[63] is between 0 and 7 */
+
+ let carry: i8 = 0
+ for (let i = 0; i < 63; ++i) {
+ e[i] += carry
+ carry = e[i] + 8
+ carry >>= 4
+ e[i] -= carry * (i8(1) << 4)
+ }
+ e[63] += carry
+ /* each e[i] is between -8 and 8 */
+
+ ge_p3_0(h)
+
+ for (let i = 1; i < 64; i += 2) {
+ ge_cmov8_base(t, i / 2, e[i])
+ ge_add_precomp(r, h, t)
+ ge_p1p1_to_p3(h, r)
+ }
+
+ ge_p3_dbl(r, h)
+ ge_p1p1_to_p2(s, r)
+ ge_p2_dbl(r, s)
+ ge_p1p1_to_p2(s, r)
+ ge_p2_dbl(r, s)
+ ge_p1p1_to_p2(s, r)
+ ge_p2_dbl(r, s)
+ ge_p1p1_to_p3(h, r)
-/* r = -p */
-function ge_p3_neg (r: ge_p3, p: ge_p3): void {
- fe_neg(r.X, p.X)
- fe_copy(r.Y, p.Y)
- fe_copy(r.Z, p.Z)
- fe_neg(r.T, p.T)
+ for (let i = 0; i < 64; i += 2) {
+ ge_cmov8_base(t, i / 2, e[i])
+ ge_add_precomp(r, h, t)
+ ge_p1p1_to_p3(h, r)
+ }
}
-const ge_p3_add_q_cached = new ge_cached()
-const ge_p3_add_q_p1p1 = new ge_p1p1()
-/* r = p+q */
-function ge_p3_add (r: ge_p3, p: ge_p3, q: ge_p3): void {
- const q_cached = ge_p3_add_q_cached
- const p1p1 = ge_p3_add_q_p1p1
- ge_p3_to_cached(q_cached, q)
- ge_add_cached(p1p1, p, q_cached)
- ge_p1p1_to_p3(r, p1p1)
+const ge_scalarmult_base_tobytes_h: ge_p3 = new ge_p3()
+export function ge_scalarmult_base_tobytes (s: StaticArray<u8>, a: StaticArray<u8>): void {
+ const h = ge_scalarmult_base_tobytes_h
+ ge_scalarmult_base(h, a)
+ ge_p3_tobytes(s, h)
}
const ge_double_scalarmult_vartime_aslide: StaticArray<i8> = new StaticArray<i8>(256)
const ge_double_scalarmult_vartime_bslide: StaticArray<i8> = new StaticArray<i8>(256)
-const ge_double_scalarmult_vartime_Ai: StaticArray<ge_cached> = ge_cached_8() /* A,3A,5A,7A,9A,11A,13A,15A */
+const ge_double_scalarmult_vartime_Ai: StaticArray<ge_cached> = StaticArray.fromArray<ge_cached>([
+ new ge_cached(),
+ new ge_cached(),
+ new ge_cached(),
+ new ge_cached(),
+ new ge_cached(),
+ new ge_cached(),
+ new ge_cached(),
+ new ge_cached()
+]) /* A,3A,5A,7A,9A,11A,13A,15A */
const ge_double_scalarmult_vartime_t: ge_p1p1 = new ge_p1p1()
const ge_double_scalarmult_vartime_u: ge_p3 = new ge_p3()
const ge_double_scalarmult_vartime_A2: ge_p3 = new ge_p3()
*
* Only used for signatures verification.
*/
-export function ge_double_scalarmult_vartime (r: ge_p2, a: StaticArray<u8>, A: ge_p3, b: StaticArray<u8>): void {
+function ge_double_scalarmult_vartime (r: ge_p2, a: StaticArray<u8>, A: ge_p3, b: StaticArray<u8>): void {
const Bi = base2
const aslide = ge_double_scalarmult_vartime_aslide
const bslide = ge_double_scalarmult_vartime_bslide
}
}
+const ge_double_scalarmult_vartime_p: ge_p2 = new ge_p2()
+export function ge_double_scalarmult_vartime_to_p3 (r: ge_p3, a: StaticArray<u8>, A: ge_p3, b: StaticArray<u8>): void {
+ const p = ge_double_scalarmult_vartime_p
+ ge_double_scalarmult_vartime(p, a, A, b)
+ ge_p2_to_p3(r, p)
+}
+
function slide_vartime (r: StaticArray<i8>, a: StaticArray<u8>): void {
let i: i32
let b: i32
}
}
}
-
-const ge_sub_cached_t0: FieldElement = fe()
-/**
- * r = p - q
- */
-function ge_sub_cached (r: ge_p1p1, p: ge_p3, q: ge_cached): void {
- const t0 = ge_sub_cached_t0
- fe_add(r.X, p.Y, p.X)
- fe_sub(r.Y, p.Y, p.X)
- fe_mul(r.Z, r.X, q.YminusX)
- fe_mul(r.Y, r.Y, q.YplusX)
- fe_mul(r.T, q.T2d, p.T)
- fe_mul(r.X, p.Z, q.Z)
- fe_add(t0, r.X, r.X)
- fe_sub(r.X, r.Z, r.Y)
- fe_add(r.Y, r.Z, r.Y)
- fe_sub(r.Z, t0, r.T)
- fe_add(r.T, t0, r.T)
-}
-
-const ge_sub_precomp_t0: FieldElement = fe()
-/**
- * r = p - q
- */
-function ge_sub_precomp (r: ge_p1p1, p: ge_p3, q: ge_precomp): void {
- const t0 = ge_sub_precomp_t0
- fe_add(r.X, p.Y, p.X)
- fe_sub(r.Y, p.Y, p.X)
- fe_mul(r.Z, r.X, q.yminusx)
- fe_mul(r.Y, r.Y, q.yplusx)
- fe_mul(r.T, q.xy2d, p.T)
- fe_add(t0, p.Z, p.Z)
- fe_sub(r.X, r.Z, r.Y)
- fe_add(r.Y, r.Z, r.Y)
- fe_sub(r.Z, t0, r.T)
- fe_add(r.T, t0, r.T)
-}