From 6e90a13f10c93923af8c62fe3a57f9daf8c6ecff Mon Sep 17 00:00:00 2001
From: Loup Vaillant <loup@loup-vaillant.fr>
Date: Wed, 8 Aug 2018 23:24:25 +0200
Subject: [PATCH] Signed comb with unsigned table

Or, bitwiseshiftleft saves the day.  The current code is hacky as hell,
but it works, and it cleared up my confusion.  Turns out a suitable
signed comb is quite different from an unsigned one: the table itself
should represent -1 and 1 bits, instead of 0 and 1 bits.

Right now the same effect is achieved with 2 additions (more precisely,
an addition and a subtraction).  With the proper table, it can be one
operation.
---
 src/monocypher.c | 103 +++++++++++++----------------------------------
 1 file changed, 27 insertions(+), 76 deletions(-)

diff --git a/src/monocypher.c b/src/monocypher.c
index 9b01366..0e97cfb 100644
--- a/src/monocypher.c
+++ b/src/monocypher.c
@@ -1836,7 +1836,6 @@ static void ge_msub(ge *s, const ge *p, const fe yp, const fe ym, const fe t2,
     fe_neg(nt2, t2);
     ge_madd(s, p, ym, yp, nt2, a, b);
 }
-#include <stdio.h>
 
 static void ge_scalarmult_base(ge *p, const u8 scalar[32])
 {
@@ -1848,53 +1847,22 @@ static void ge_scalarmult_base(ge *p, const u8 scalar[32])
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
     };
-    static const u8 ones[32] = {
-        255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 127, // only over 255 bits
-    };
-    i64 s[64] = {0};
-    FOR (i,  0, 32) {
-        FOR (j, 0, 32) {
-            s[i+j] += half_mod_L[i] * ((u64) scalar[j] + ones[j]);
-        }
-    }
-    u8 s_scalar[32]; // for each bit: 1 means 1, 0 means -1
-    modL(s_scalar, s);
-
-    // Transform the scalar into all bit set form
-    // Method 2: ((2^255 - 1) * (1/2)) +  (scalar * (1/2))
     static const u8 half_ones[32] = {
         0x42, 0x9a, 0xa3, 0xba, 0x23, 0xa5, 0xbf, 0xcb,
         0x11, 0x5b, 0x9d, 0xc5, 0x74, 0x95, 0xf3, 0xb6,
         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
         0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x07,
     };
-    i64 ss[64];
-    FOR (i,  0, 32) { ss[i] = (u64) half_ones[i]; }
-    FOR (i, 32, 64) { ss[i] = 0;                  }
+    i64 s[64];
+    FOR (i,  0, 32) { s[i] = (u64) half_ones[i]; }
+    FOR (i, 32, 64) { s[i] = 0;                  }
     FOR (i,  0, 32) {
         FOR (j, 0, 32) {
-            ss[i+j] += half_mod_L[i] * (u64) scalar[j];
+            s[i+j] += half_mod_L[i] * (u64) scalar[j];
         }
     }
-    u8 ss_scalar[32]; // for each bit: 1 means 1, 0 means -1
-    modL(ss_scalar, s);
-
-    // Result of method 1: s_scalar
-    // Result of method 2: ss_scalar
-
-    // Uncomment those print statements to print the scalar. in various forms
-    // Note that s_scalar and ss_scalar have the same value. The whole thing
-    // looks correct.
-
-    /* printf("0x"); FOR (i, 0, 32) printf("%02x", half_mod_L[31-i]); printf("\n"); */
-    /* printf("0x"); FOR (i, 0, 32) printf("%02x", ones      [31-i]); printf("\n"); */
-    /* printf("0x"); FOR (i, 0, 32) printf("%02x", scalar    [31-i]); printf("\n"); */
-    /* printf("0x"); FOR (i, 0, 32) printf("%02x", s_scalar  [31-i]); printf("\n"); */
-    /* printf("0x"); FOR (i, 0, 32) printf("%02x", ss_scalar [31-i]); printf("\n"); */
-    /* printf("\n"); */
+    u8 s_scalar[32]; // for each bit: 1 means 1, 0 means -1
+    modL(s_scalar, s);
 
     // Double and add ladder
     fe yp, ym, t2, a, b; // temporaries for addition
@@ -1908,17 +1876,6 @@ static void ge_scalarmult_base(ge *p, const u8 scalar[32])
         fe_1(ym);
         fe_0(t2);
 
-        // Experimental method (signed comb). The one that doesn't work.
-        // Yet I don't see any room for error there:
-        // - I'm using the same tables         as the method that works
-        // - I'm using the same look up method as the method that works
-        // - I'm using the same subtraction    as the method that works
-        // - The s_scalar is unproven, but still looks correct
-        // - I don't see how the teeth could be incorrect.
-        // - I don't see how my index could be incorrect.
-        // - I don't see how the add/sub choice could be incorrect.
-        // I have no idea what I'm missing.
-        /*
         i8 teeth = (scalar_bit(s_scalar, i      ) *  2 -  1)
             +      (scalar_bit(s_scalar, i +  51) *  4 -  2)
             +      (scalar_bit(s_scalar, i + 102) *  8 -  4)
@@ -1926,38 +1883,32 @@ static void ge_scalarmult_base(ge *p, const u8 scalar[32])
             +      (scalar_bit(s_scalar, i + 204) * 32 - 16);
         u8 index = (teeth > 0 ? teeth : -teeth) / 2;
 
-        fe_copy(yp, comb_Yp[index]);
-        fe_copy(ym, comb_Ym[index]);
-        fe_copy(t2, comb_T2[index]);
+        if (index & 1) { // pick from the odd table
+            fe_copy(yp, comb_Yp[index/2+8]);
+            fe_copy(ym, comb_Ym[index/2+8]);
+            fe_copy(t2, comb_T2[index/2+8]);
+        } else {         // pick from the even table
+            fe_copy(yp, comb_Yp_even[index/2+8]);
+            fe_copy(ym, comb_Ym_even[index/2+8]);
+            fe_copy(t2, comb_T2_even[index/2+8]);
+        }
 
         if (teeth > 0) { ge_madd(p, p, yp, ym, t2, a, b); }
         else           { ge_msub(p, p, yp, ym, t2, a, b); }
-        */
-        // End of the experimental method
-
-        // Control method. The one that works.
-        // Note that odd and even elements of the table are separated.
-        // This is to ensure we use the same odd table for the signed comb.
-        // Since this method works, an error in the tables is unlikely.
-        u8 teeth = scalar_bit(scalar, i)
-            |     (scalar_bit(scalar, i +  51) << 1)
-            |     (scalar_bit(scalar, i + 102) << 2)
-            |     (scalar_bit(scalar, i + 153) << 3)
-            |     (scalar_bit(scalar, i + 204) << 4);
-        u8  index = teeth / 2;
-        if (teeth & 1) { // pick from the odd table
-            fe_copy(yp, comb_Yp[index]);
-            fe_copy(ym, comb_Ym[index]);
-            fe_copy(t2, comb_T2[index]);
+
+        index ^= 0xF;
+        if (index & 1) { // pick from the odd table
+            fe_copy(yp, comb_Yp[index/2]);
+            fe_copy(ym, comb_Ym[index/2]);
+            fe_copy(t2, comb_T2[index/2]);
         } else {         // pick from the even table
-            fe_copy(yp, comb_Yp_even[index]);
-            fe_copy(ym, comb_Ym_even[index]);
-            fe_copy(t2, comb_T2_even[index]);
+            fe_copy(yp, comb_Yp_even[index/2]);
+            fe_copy(ym, comb_Ym_even[index/2]);
+            fe_copy(t2, comb_T2_even[index/2]);
         }
-        ge_msub(p, p, yp, ym, t2, a, b); // test subtraction
-        ge_madd(p, p, yp, ym, t2, a, b); // add to compensate
-        ge_madd(p, p, yp, ym, t2, a, b); // add for real
-        // End of the control method.
+
+        if (teeth > 0) { ge_msub(p, p, yp, ym, t2, a, b); }
+        else           { ge_madd(p, p, yp, ym, t2, a, b); }
     }
     WIPE_CTX(&dbl);
     WIPE_BUFFER(ym);  WIPE_BUFFER(yp);  WIPE_BUFFER(t2);
-- 
2.47.3