From ef38541c6e7c8cf88a68c46d65aa5e141980ccf8 Mon Sep 17 00:00:00 2001 From: Thom Wiggers Date: Fri, 13 Mar 2020 13:00:57 -0400 Subject: [PATCH] Clean up SABER --- crypto_kem/firesaber/clean/poly_mul.c | 41 ++++++++++++++++---------- crypto_kem/lightsaber/clean/poly_mul.c | 41 ++++++++++++++++---------- crypto_kem/saber/clean/poly_mul.c | 41 ++++++++++++++++---------- 3 files changed, 75 insertions(+), 48 deletions(-) diff --git a/crypto_kem/firesaber/clean/poly_mul.c b/crypto_kem/firesaber/clean/poly_mul.c index c71d917c..e6ba9364 100644 --- a/crypto_kem/firesaber/clean/poly_mul.c +++ b/crypto_kem/firesaber/clean/poly_mul.c @@ -8,6 +8,7 @@ #define N_SB (SABER_N >> 2) #define N_SB_RES (2*N_SB-1) +#define OVERFLOWING_MUL(X, Y) ((uint16_t)((uint32_t)(X) * (uint32_t)(Y))) #define KARATSUBA_N 64 static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t *result_final) { @@ -37,8 +38,12 @@ static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t acc5 = b_1[j]; //b0 acc6 = b_1[j + KARATSUBA_N / 4]; //b1 - result_final[i + j + 0 * KARATSUBA_N / 4] = result_final[i + j + 0 * KARATSUBA_N / 4] + acc1 * acc5; - result_final[i + j + 2 * KARATSUBA_N / 4] = result_final[i + j + 2 * KARATSUBA_N / 4] + acc2 * acc6; + result_final[i + j + 0 * KARATSUBA_N / 4] = + result_final[i + j + 0 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc1, acc5); + result_final[i + j + 2 * KARATSUBA_N / 4] = + result_final[i + j + 2 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc2, acc6); acc7 = acc5 + acc6; //b01 acc8 = acc1 + acc2; //a01 @@ -47,26 +52,34 @@ static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t acc7 = b_1[j + 2 * KARATSUBA_N / 4]; //b2 acc8 = b_1[j + 3 * KARATSUBA_N / 4]; //b3 - result_final[i + j + 4 * KARATSUBA_N / 4] = result_final[i + j + 4 * KARATSUBA_N / 4] + acc7 * acc3; + result_final[i + j + 4 * KARATSUBA_N / 4] = + result_final[i + j + 4 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc7, acc3); - result_final[i + j + 6 * KARATSUBA_N / 4] = result_final[i + j + 6 * KARATSUBA_N / 4] + acc8 * acc4; + result_final[i + j + 6 * KARATSUBA_N / 4] = + result_final[i + j + 6 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc8, acc4); acc9 = acc3 + acc4; acc10 = acc7 + acc8; - d23[i + j] = d23[i + j] + acc9 * acc10; + d23[i + j] = d23[i + j] + OVERFLOWING_MUL(acc9, acc10); //-------------------------------------------------------- acc5 = acc5 + acc7; //b02 acc7 = acc1 + acc3; //a02 - result_d01[i + j + 0 * KARATSUBA_N / 4] = result_d01[i + j + 0 * KARATSUBA_N / 4] + acc5 * acc7; + result_d01[i + j + 0 * KARATSUBA_N / 4] = + result_d01[i + j + 0 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc5, acc7); acc6 = acc6 + acc8; //b13 acc8 = acc2 + acc4; - result_d01[i + j + 2 * KARATSUBA_N / 4] = result_d01[i + j + 2 * KARATSUBA_N / 4] + acc6 * acc8; + result_d01[i + j + 2 * KARATSUBA_N / 4] = + result_d01[i + j + 2 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc6, acc8); acc5 = acc5 + acc6; acc7 = acc7 + acc8; - d0123[i + j] = d0123[i + j] + acc5 * acc7; + d0123[i + j] = d0123[i + j] + OVERFLOWING_MUL(acc5, acc7); } } @@ -197,11 +210,11 @@ static void toom_cook_4way (const uint16_t *a1, const uint16_t *b1, uint16_t *re r2 = r2 - r6; r2 = r2 - r0; r1 = r1 + 45 * r2; - r4 = (((r4 - (r2 << 3)) * inv3) >> 3); + r4 = (uint16_t)(((r4 - (r2 << 3)) * (uint32_t)inv3) >> 3); r5 = r5 + r1; - r1 = (((r1 + (r3 << 4)) * inv9) >> 1); + r1 = (uint16_t)(((r1 + (r3 << 4)) * (uint32_t)inv9) >> 1); r3 = -(r3 + r1); - r5 = (((30 * r1 - r5) * inv15) >> 2); + r5 = (uint16_t)(((30 * r1 - r5) * (uint32_t)inv15) >> 2); r2 = r2 - r4; r1 = r1 - r5; @@ -215,9 +228,7 @@ static void toom_cook_4way (const uint16_t *a1, const uint16_t *b1, uint16_t *re } } -void PQCLEAN_FIRESABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, uint16_t p, uint32_t n) - -{ +void PQCLEAN_FIRESABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, uint16_t p, uint32_t n) { uint32_t i; // normal multiplication uint16_t c[512]; @@ -232,6 +243,4 @@ void PQCLEAN_FIRESABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, ui for (i = n; i < 2 * n; i++) { res[i - n] = (c[i - n] - c[i]) & (p - 1); } - - } diff --git a/crypto_kem/lightsaber/clean/poly_mul.c b/crypto_kem/lightsaber/clean/poly_mul.c index 5b029a78..d1ffc530 100644 --- a/crypto_kem/lightsaber/clean/poly_mul.c +++ b/crypto_kem/lightsaber/clean/poly_mul.c @@ -8,6 +8,7 @@ #define N_SB (SABER_N >> 2) #define N_SB_RES (2*N_SB-1) +#define OVERFLOWING_MUL(X, Y) ((uint16_t)((uint32_t)(X) * (uint32_t)(Y))) #define KARATSUBA_N 64 static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t *result_final) { @@ -37,8 +38,12 @@ static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t acc5 = b_1[j]; //b0 acc6 = b_1[j + KARATSUBA_N / 4]; //b1 - result_final[i + j + 0 * KARATSUBA_N / 4] = result_final[i + j + 0 * KARATSUBA_N / 4] + acc1 * acc5; - result_final[i + j + 2 * KARATSUBA_N / 4] = result_final[i + j + 2 * KARATSUBA_N / 4] + acc2 * acc6; + result_final[i + j + 0 * KARATSUBA_N / 4] = + result_final[i + j + 0 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc1, acc5); + result_final[i + j + 2 * KARATSUBA_N / 4] = + result_final[i + j + 2 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc2, acc6); acc7 = acc5 + acc6; //b01 acc8 = acc1 + acc2; //a01 @@ -47,26 +52,34 @@ static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t acc7 = b_1[j + 2 * KARATSUBA_N / 4]; //b2 acc8 = b_1[j + 3 * KARATSUBA_N / 4]; //b3 - result_final[i + j + 4 * KARATSUBA_N / 4] = result_final[i + j + 4 * KARATSUBA_N / 4] + acc7 * acc3; + result_final[i + j + 4 * KARATSUBA_N / 4] = + result_final[i + j + 4 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc7, acc3); - result_final[i + j + 6 * KARATSUBA_N / 4] = result_final[i + j + 6 * KARATSUBA_N / 4] + acc8 * acc4; + result_final[i + j + 6 * KARATSUBA_N / 4] = + result_final[i + j + 6 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc8, acc4); acc9 = acc3 + acc4; acc10 = acc7 + acc8; - d23[i + j] = d23[i + j] + acc9 * acc10; + d23[i + j] = d23[i + j] + OVERFLOWING_MUL(acc9, acc10); //-------------------------------------------------------- acc5 = acc5 + acc7; //b02 acc7 = acc1 + acc3; //a02 - result_d01[i + j + 0 * KARATSUBA_N / 4] = result_d01[i + j + 0 * KARATSUBA_N / 4] + acc5 * acc7; + result_d01[i + j + 0 * KARATSUBA_N / 4] = + result_d01[i + j + 0 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc5, acc7); acc6 = acc6 + acc8; //b13 acc8 = acc2 + acc4; - result_d01[i + j + 2 * KARATSUBA_N / 4] = result_d01[i + j + 2 * KARATSUBA_N / 4] + acc6 * acc8; + result_d01[i + j + 2 * KARATSUBA_N / 4] = + result_d01[i + j + 2 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc6, acc8); acc5 = acc5 + acc6; acc7 = acc7 + acc8; - d0123[i + j] = d0123[i + j] + acc5 * acc7; + d0123[i + j] = d0123[i + j] + OVERFLOWING_MUL(acc5, acc7); } } @@ -197,11 +210,11 @@ static void toom_cook_4way (const uint16_t *a1, const uint16_t *b1, uint16_t *re r2 = r2 - r6; r2 = r2 - r0; r1 = r1 + 45 * r2; - r4 = (((r4 - (r2 << 3)) * inv3) >> 3); + r4 = (uint16_t)(((r4 - (r2 << 3)) * (uint32_t)inv3) >> 3); r5 = r5 + r1; - r1 = (((r1 + (r3 << 4)) * inv9) >> 1); + r1 = (uint16_t)(((r1 + (r3 << 4)) * (uint32_t)inv9) >> 1); r3 = -(r3 + r1); - r5 = (((30 * r1 - r5) * inv15) >> 2); + r5 = (uint16_t)(((30 * r1 - r5) * (uint32_t)inv15) >> 2); r2 = r2 - r4; r1 = r1 - r5; @@ -215,9 +228,7 @@ static void toom_cook_4way (const uint16_t *a1, const uint16_t *b1, uint16_t *re } } -void PQCLEAN_LIGHTSABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, uint16_t p, uint32_t n) - -{ +void PQCLEAN_LIGHTSABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, uint16_t p, uint32_t n) { uint32_t i; // normal multiplication uint16_t c[512]; @@ -232,6 +243,4 @@ void PQCLEAN_LIGHTSABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, u for (i = n; i < 2 * n; i++) { res[i - n] = (c[i - n] - c[i]) & (p - 1); } - - } diff --git a/crypto_kem/saber/clean/poly_mul.c b/crypto_kem/saber/clean/poly_mul.c index eb0f38d5..854cddd4 100644 --- a/crypto_kem/saber/clean/poly_mul.c +++ b/crypto_kem/saber/clean/poly_mul.c @@ -8,6 +8,7 @@ #define N_SB (SABER_N >> 2) #define N_SB_RES (2*N_SB-1) +#define OVERFLOWING_MUL(X, Y) ((uint16_t)((uint32_t)(X) * (uint32_t)(Y))) #define KARATSUBA_N 64 static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t *result_final) { @@ -37,8 +38,12 @@ static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t acc5 = b_1[j]; //b0 acc6 = b_1[j + KARATSUBA_N / 4]; //b1 - result_final[i + j + 0 * KARATSUBA_N / 4] = result_final[i + j + 0 * KARATSUBA_N / 4] + acc1 * acc5; - result_final[i + j + 2 * KARATSUBA_N / 4] = result_final[i + j + 2 * KARATSUBA_N / 4] + acc2 * acc6; + result_final[i + j + 0 * KARATSUBA_N / 4] = + result_final[i + j + 0 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc1, acc5); + result_final[i + j + 2 * KARATSUBA_N / 4] = + result_final[i + j + 2 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc2, acc6); acc7 = acc5 + acc6; //b01 acc8 = acc1 + acc2; //a01 @@ -47,26 +52,34 @@ static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t acc7 = b_1[j + 2 * KARATSUBA_N / 4]; //b2 acc8 = b_1[j + 3 * KARATSUBA_N / 4]; //b3 - result_final[i + j + 4 * KARATSUBA_N / 4] = result_final[i + j + 4 * KARATSUBA_N / 4] + acc7 * acc3; + result_final[i + j + 4 * KARATSUBA_N / 4] = + result_final[i + j + 4 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc7, acc3); - result_final[i + j + 6 * KARATSUBA_N / 4] = result_final[i + j + 6 * KARATSUBA_N / 4] + acc8 * acc4; + result_final[i + j + 6 * KARATSUBA_N / 4] = + result_final[i + j + 6 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc8, acc4); acc9 = acc3 + acc4; acc10 = acc7 + acc8; - d23[i + j] = d23[i + j] + acc9 * acc10; + d23[i + j] = d23[i + j] + OVERFLOWING_MUL(acc9, acc10); //-------------------------------------------------------- acc5 = acc5 + acc7; //b02 acc7 = acc1 + acc3; //a02 - result_d01[i + j + 0 * KARATSUBA_N / 4] = result_d01[i + j + 0 * KARATSUBA_N / 4] + acc5 * acc7; + result_d01[i + j + 0 * KARATSUBA_N / 4] = + result_d01[i + j + 0 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc5, acc7); acc6 = acc6 + acc8; //b13 acc8 = acc2 + acc4; - result_d01[i + j + 2 * KARATSUBA_N / 4] = result_d01[i + j + 2 * KARATSUBA_N / 4] + acc6 * acc8; + result_d01[i + j + 2 * KARATSUBA_N / 4] = + result_d01[i + j + 2 * KARATSUBA_N / 4] + + OVERFLOWING_MUL(acc6, acc8); acc5 = acc5 + acc6; acc7 = acc7 + acc8; - d0123[i + j] = d0123[i + j] + acc5 * acc7; + d0123[i + j] = d0123[i + j] + OVERFLOWING_MUL(acc5, acc7); } } @@ -197,11 +210,11 @@ static void toom_cook_4way (const uint16_t *a1, const uint16_t *b1, uint16_t *re r2 = r2 - r6; r2 = r2 - r0; r1 = r1 + 45 * r2; - r4 = (((r4 - (r2 << 3)) * inv3) >> 3); + r4 = (uint16_t)(((r4 - (r2 << 3)) * (uint32_t)inv3) >> 3); r5 = r5 + r1; - r1 = (((r1 + (r3 << 4)) * inv9) >> 1); + r1 = (uint16_t)(((r1 + (r3 << 4)) * (uint32_t)inv9) >> 1); r3 = -(r3 + r1); - r5 = (((30 * r1 - r5) * inv15) >> 2); + r5 = (uint16_t)(((30 * r1 - r5) * (uint32_t)inv15) >> 2); r2 = r2 - r4; r1 = r1 - r5; @@ -215,9 +228,7 @@ static void toom_cook_4way (const uint16_t *a1, const uint16_t *b1, uint16_t *re } } -void PQCLEAN_SABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, uint16_t p, uint32_t n) - -{ +void PQCLEAN_SABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, uint16_t p, uint32_t n) { uint32_t i; // normal multiplication uint16_t c[512]; @@ -232,6 +243,4 @@ void PQCLEAN_SABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, uint16 for (i = n; i < 2 * n; i++) { res[i - n] = (c[i - n] - c[i]) & (p - 1); } - - }