Clean up SABER
This commit is contained in:
bovenliggende
5f02a4e80c
commit
3307f05c49
@ -8,6 +8,7 @@
|
||||
#define N_SB (SABER_N >> 2)
|
||||
#define N_SB_RES (2*N_SB-1)
|
||||
|
||||
#define OVERFLOWING_MUL(X, Y) ((uint16_t)((uint32_t)(X) * (uint32_t)(Y)))
|
||||
|
||||
#define KARATSUBA_N 64
|
||||
static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t *result_final) {
|
||||
@ -37,8 +38,12 @@ static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t
|
||||
acc5 = b_1[j]; //b0
|
||||
acc6 = b_1[j + KARATSUBA_N / 4]; //b1
|
||||
|
||||
result_final[i + j + 0 * KARATSUBA_N / 4] = result_final[i + j + 0 * KARATSUBA_N / 4] + acc1 * acc5;
|
||||
result_final[i + j + 2 * KARATSUBA_N / 4] = result_final[i + j + 2 * KARATSUBA_N / 4] + acc2 * acc6;
|
||||
result_final[i + j + 0 * KARATSUBA_N / 4] =
|
||||
result_final[i + j + 0 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc1, acc5);
|
||||
result_final[i + j + 2 * KARATSUBA_N / 4] =
|
||||
result_final[i + j + 2 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc2, acc6);
|
||||
|
||||
acc7 = acc5 + acc6; //b01
|
||||
acc8 = acc1 + acc2; //a01
|
||||
@ -47,26 +52,34 @@ static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t
|
||||
|
||||
acc7 = b_1[j + 2 * KARATSUBA_N / 4]; //b2
|
||||
acc8 = b_1[j + 3 * KARATSUBA_N / 4]; //b3
|
||||
result_final[i + j + 4 * KARATSUBA_N / 4] = result_final[i + j + 4 * KARATSUBA_N / 4] + acc7 * acc3;
|
||||
result_final[i + j + 4 * KARATSUBA_N / 4] =
|
||||
result_final[i + j + 4 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc7, acc3);
|
||||
|
||||
result_final[i + j + 6 * KARATSUBA_N / 4] = result_final[i + j + 6 * KARATSUBA_N / 4] + acc8 * acc4;
|
||||
result_final[i + j + 6 * KARATSUBA_N / 4] =
|
||||
result_final[i + j + 6 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc8, acc4);
|
||||
|
||||
acc9 = acc3 + acc4;
|
||||
acc10 = acc7 + acc8;
|
||||
d23[i + j] = d23[i + j] + acc9 * acc10;
|
||||
d23[i + j] = d23[i + j] + OVERFLOWING_MUL(acc9, acc10);
|
||||
//--------------------------------------------------------
|
||||
|
||||
acc5 = acc5 + acc7; //b02
|
||||
acc7 = acc1 + acc3; //a02
|
||||
result_d01[i + j + 0 * KARATSUBA_N / 4] = result_d01[i + j + 0 * KARATSUBA_N / 4] + acc5 * acc7;
|
||||
result_d01[i + j + 0 * KARATSUBA_N / 4] =
|
||||
result_d01[i + j + 0 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc5, acc7);
|
||||
|
||||
acc6 = acc6 + acc8; //b13
|
||||
acc8 = acc2 + acc4;
|
||||
result_d01[i + j + 2 * KARATSUBA_N / 4] = result_d01[i + j + 2 * KARATSUBA_N / 4] + acc6 * acc8;
|
||||
result_d01[i + j + 2 * KARATSUBA_N / 4] =
|
||||
result_d01[i + j + 2 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc6, acc8);
|
||||
|
||||
acc5 = acc5 + acc6;
|
||||
acc7 = acc7 + acc8;
|
||||
d0123[i + j] = d0123[i + j] + acc5 * acc7;
|
||||
d0123[i + j] = d0123[i + j] + OVERFLOWING_MUL(acc5, acc7);
|
||||
}
|
||||
}
|
||||
|
||||
@ -197,11 +210,11 @@ static void toom_cook_4way (const uint16_t *a1, const uint16_t *b1, uint16_t *re
|
||||
r2 = r2 - r6;
|
||||
r2 = r2 - r0;
|
||||
r1 = r1 + 45 * r2;
|
||||
r4 = (((r4 - (r2 << 3)) * inv3) >> 3);
|
||||
r4 = (uint16_t)(((r4 - (r2 << 3)) * (uint32_t)inv3) >> 3);
|
||||
r5 = r5 + r1;
|
||||
r1 = (((r1 + (r3 << 4)) * inv9) >> 1);
|
||||
r1 = (uint16_t)(((r1 + (r3 << 4)) * (uint32_t)inv9) >> 1);
|
||||
r3 = -(r3 + r1);
|
||||
r5 = (((30 * r1 - r5) * inv15) >> 2);
|
||||
r5 = (uint16_t)(((30 * r1 - r5) * (uint32_t)inv15) >> 2);
|
||||
r2 = r2 - r4;
|
||||
r1 = r1 - r5;
|
||||
|
||||
@ -215,9 +228,7 @@ static void toom_cook_4way (const uint16_t *a1, const uint16_t *b1, uint16_t *re
|
||||
}
|
||||
}
|
||||
|
||||
void PQCLEAN_FIRESABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, uint16_t p, uint32_t n)
|
||||
|
||||
{
|
||||
void PQCLEAN_FIRESABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, uint16_t p, uint32_t n) {
|
||||
uint32_t i;
|
||||
// normal multiplication
|
||||
uint16_t c[512];
|
||||
@ -232,6 +243,4 @@ void PQCLEAN_FIRESABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, ui
|
||||
for (i = n; i < 2 * n; i++) {
|
||||
res[i - n] = (c[i - n] - c[i]) & (p - 1);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#define N_SB (SABER_N >> 2)
|
||||
#define N_SB_RES (2*N_SB-1)
|
||||
|
||||
#define OVERFLOWING_MUL(X, Y) ((uint16_t)((uint32_t)(X) * (uint32_t)(Y)))
|
||||
|
||||
#define KARATSUBA_N 64
|
||||
static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t *result_final) {
|
||||
@ -37,8 +38,12 @@ static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t
|
||||
acc5 = b_1[j]; //b0
|
||||
acc6 = b_1[j + KARATSUBA_N / 4]; //b1
|
||||
|
||||
result_final[i + j + 0 * KARATSUBA_N / 4] = result_final[i + j + 0 * KARATSUBA_N / 4] + acc1 * acc5;
|
||||
result_final[i + j + 2 * KARATSUBA_N / 4] = result_final[i + j + 2 * KARATSUBA_N / 4] + acc2 * acc6;
|
||||
result_final[i + j + 0 * KARATSUBA_N / 4] =
|
||||
result_final[i + j + 0 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc1, acc5);
|
||||
result_final[i + j + 2 * KARATSUBA_N / 4] =
|
||||
result_final[i + j + 2 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc2, acc6);
|
||||
|
||||
acc7 = acc5 + acc6; //b01
|
||||
acc8 = acc1 + acc2; //a01
|
||||
@ -47,26 +52,34 @@ static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t
|
||||
|
||||
acc7 = b_1[j + 2 * KARATSUBA_N / 4]; //b2
|
||||
acc8 = b_1[j + 3 * KARATSUBA_N / 4]; //b3
|
||||
result_final[i + j + 4 * KARATSUBA_N / 4] = result_final[i + j + 4 * KARATSUBA_N / 4] + acc7 * acc3;
|
||||
result_final[i + j + 4 * KARATSUBA_N / 4] =
|
||||
result_final[i + j + 4 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc7, acc3);
|
||||
|
||||
result_final[i + j + 6 * KARATSUBA_N / 4] = result_final[i + j + 6 * KARATSUBA_N / 4] + acc8 * acc4;
|
||||
result_final[i + j + 6 * KARATSUBA_N / 4] =
|
||||
result_final[i + j + 6 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc8, acc4);
|
||||
|
||||
acc9 = acc3 + acc4;
|
||||
acc10 = acc7 + acc8;
|
||||
d23[i + j] = d23[i + j] + acc9 * acc10;
|
||||
d23[i + j] = d23[i + j] + OVERFLOWING_MUL(acc9, acc10);
|
||||
//--------------------------------------------------------
|
||||
|
||||
acc5 = acc5 + acc7; //b02
|
||||
acc7 = acc1 + acc3; //a02
|
||||
result_d01[i + j + 0 * KARATSUBA_N / 4] = result_d01[i + j + 0 * KARATSUBA_N / 4] + acc5 * acc7;
|
||||
result_d01[i + j + 0 * KARATSUBA_N / 4] =
|
||||
result_d01[i + j + 0 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc5, acc7);
|
||||
|
||||
acc6 = acc6 + acc8; //b13
|
||||
acc8 = acc2 + acc4;
|
||||
result_d01[i + j + 2 * KARATSUBA_N / 4] = result_d01[i + j + 2 * KARATSUBA_N / 4] + acc6 * acc8;
|
||||
result_d01[i + j + 2 * KARATSUBA_N / 4] =
|
||||
result_d01[i + j + 2 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc6, acc8);
|
||||
|
||||
acc5 = acc5 + acc6;
|
||||
acc7 = acc7 + acc8;
|
||||
d0123[i + j] = d0123[i + j] + acc5 * acc7;
|
||||
d0123[i + j] = d0123[i + j] + OVERFLOWING_MUL(acc5, acc7);
|
||||
}
|
||||
}
|
||||
|
||||
@ -197,11 +210,11 @@ static void toom_cook_4way (const uint16_t *a1, const uint16_t *b1, uint16_t *re
|
||||
r2 = r2 - r6;
|
||||
r2 = r2 - r0;
|
||||
r1 = r1 + 45 * r2;
|
||||
r4 = (((r4 - (r2 << 3)) * inv3) >> 3);
|
||||
r4 = (uint16_t)(((r4 - (r2 << 3)) * (uint32_t)inv3) >> 3);
|
||||
r5 = r5 + r1;
|
||||
r1 = (((r1 + (r3 << 4)) * inv9) >> 1);
|
||||
r1 = (uint16_t)(((r1 + (r3 << 4)) * (uint32_t)inv9) >> 1);
|
||||
r3 = -(r3 + r1);
|
||||
r5 = (((30 * r1 - r5) * inv15) >> 2);
|
||||
r5 = (uint16_t)(((30 * r1 - r5) * (uint32_t)inv15) >> 2);
|
||||
r2 = r2 - r4;
|
||||
r1 = r1 - r5;
|
||||
|
||||
@ -215,9 +228,7 @@ static void toom_cook_4way (const uint16_t *a1, const uint16_t *b1, uint16_t *re
|
||||
}
|
||||
}
|
||||
|
||||
void PQCLEAN_LIGHTSABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, uint16_t p, uint32_t n)
|
||||
|
||||
{
|
||||
void PQCLEAN_LIGHTSABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, uint16_t p, uint32_t n) {
|
||||
uint32_t i;
|
||||
// normal multiplication
|
||||
uint16_t c[512];
|
||||
@ -232,6 +243,4 @@ void PQCLEAN_LIGHTSABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, u
|
||||
for (i = n; i < 2 * n; i++) {
|
||||
res[i - n] = (c[i - n] - c[i]) & (p - 1);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#define N_SB (SABER_N >> 2)
|
||||
#define N_SB_RES (2*N_SB-1)
|
||||
|
||||
#define OVERFLOWING_MUL(X, Y) ((uint16_t)((uint32_t)(X) * (uint32_t)(Y)))
|
||||
|
||||
#define KARATSUBA_N 64
|
||||
static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t *result_final) {
|
||||
@ -37,8 +38,12 @@ static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t
|
||||
acc5 = b_1[j]; //b0
|
||||
acc6 = b_1[j + KARATSUBA_N / 4]; //b1
|
||||
|
||||
result_final[i + j + 0 * KARATSUBA_N / 4] = result_final[i + j + 0 * KARATSUBA_N / 4] + acc1 * acc5;
|
||||
result_final[i + j + 2 * KARATSUBA_N / 4] = result_final[i + j + 2 * KARATSUBA_N / 4] + acc2 * acc6;
|
||||
result_final[i + j + 0 * KARATSUBA_N / 4] =
|
||||
result_final[i + j + 0 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc1, acc5);
|
||||
result_final[i + j + 2 * KARATSUBA_N / 4] =
|
||||
result_final[i + j + 2 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc2, acc6);
|
||||
|
||||
acc7 = acc5 + acc6; //b01
|
||||
acc8 = acc1 + acc2; //a01
|
||||
@ -47,26 +52,34 @@ static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t
|
||||
|
||||
acc7 = b_1[j + 2 * KARATSUBA_N / 4]; //b2
|
||||
acc8 = b_1[j + 3 * KARATSUBA_N / 4]; //b3
|
||||
result_final[i + j + 4 * KARATSUBA_N / 4] = result_final[i + j + 4 * KARATSUBA_N / 4] + acc7 * acc3;
|
||||
result_final[i + j + 4 * KARATSUBA_N / 4] =
|
||||
result_final[i + j + 4 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc7, acc3);
|
||||
|
||||
result_final[i + j + 6 * KARATSUBA_N / 4] = result_final[i + j + 6 * KARATSUBA_N / 4] + acc8 * acc4;
|
||||
result_final[i + j + 6 * KARATSUBA_N / 4] =
|
||||
result_final[i + j + 6 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc8, acc4);
|
||||
|
||||
acc9 = acc3 + acc4;
|
||||
acc10 = acc7 + acc8;
|
||||
d23[i + j] = d23[i + j] + acc9 * acc10;
|
||||
d23[i + j] = d23[i + j] + OVERFLOWING_MUL(acc9, acc10);
|
||||
//--------------------------------------------------------
|
||||
|
||||
acc5 = acc5 + acc7; //b02
|
||||
acc7 = acc1 + acc3; //a02
|
||||
result_d01[i + j + 0 * KARATSUBA_N / 4] = result_d01[i + j + 0 * KARATSUBA_N / 4] + acc5 * acc7;
|
||||
result_d01[i + j + 0 * KARATSUBA_N / 4] =
|
||||
result_d01[i + j + 0 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc5, acc7);
|
||||
|
||||
acc6 = acc6 + acc8; //b13
|
||||
acc8 = acc2 + acc4;
|
||||
result_d01[i + j + 2 * KARATSUBA_N / 4] = result_d01[i + j + 2 * KARATSUBA_N / 4] + acc6 * acc8;
|
||||
result_d01[i + j + 2 * KARATSUBA_N / 4] =
|
||||
result_d01[i + j + 2 * KARATSUBA_N / 4] +
|
||||
OVERFLOWING_MUL(acc6, acc8);
|
||||
|
||||
acc5 = acc5 + acc6;
|
||||
acc7 = acc7 + acc8;
|
||||
d0123[i + j] = d0123[i + j] + acc5 * acc7;
|
||||
d0123[i + j] = d0123[i + j] + OVERFLOWING_MUL(acc5, acc7);
|
||||
}
|
||||
}
|
||||
|
||||
@ -197,11 +210,11 @@ static void toom_cook_4way (const uint16_t *a1, const uint16_t *b1, uint16_t *re
|
||||
r2 = r2 - r6;
|
||||
r2 = r2 - r0;
|
||||
r1 = r1 + 45 * r2;
|
||||
r4 = (((r4 - (r2 << 3)) * inv3) >> 3);
|
||||
r4 = (uint16_t)(((r4 - (r2 << 3)) * (uint32_t)inv3) >> 3);
|
||||
r5 = r5 + r1;
|
||||
r1 = (((r1 + (r3 << 4)) * inv9) >> 1);
|
||||
r1 = (uint16_t)(((r1 + (r3 << 4)) * (uint32_t)inv9) >> 1);
|
||||
r3 = -(r3 + r1);
|
||||
r5 = (((30 * r1 - r5) * inv15) >> 2);
|
||||
r5 = (uint16_t)(((30 * r1 - r5) * (uint32_t)inv15) >> 2);
|
||||
r2 = r2 - r4;
|
||||
r1 = r1 - r5;
|
||||
|
||||
@ -215,9 +228,7 @@ static void toom_cook_4way (const uint16_t *a1, const uint16_t *b1, uint16_t *re
|
||||
}
|
||||
}
|
||||
|
||||
void PQCLEAN_SABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, uint16_t p, uint32_t n)
|
||||
|
||||
{
|
||||
void PQCLEAN_SABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, uint16_t p, uint32_t n) {
|
||||
uint32_t i;
|
||||
// normal multiplication
|
||||
uint16_t c[512];
|
||||
@ -232,6 +243,4 @@ void PQCLEAN_SABER_CLEAN_pol_mul(uint16_t *a, uint16_t *b, uint16_t *res, uint16
|
||||
for (i = n; i < 2 * n; i++) {
|
||||
res[i - n] = (c[i - n] - c[i]) & (p - 1);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
Laden…
Verwijs in nieuw issue
Block a user