|
@@ -38,66 +38,126 @@ int32_t PQCLEAN_DILITHIUM2_CLEAN_power2round(int32_t *a0, int32_t a) { |
|
|
* |
|
|
* |
|
|
* Returns a1. |
|
|
* Returns a1. |
|
|
**************************************************/ |
|
|
**************************************************/ |
|
|
int32_t PQCLEAN_DILITHIUM2_CLEAN_decompose(int32_t *a0, int32_t a) { |
|
|
|
|
|
int32_t a1 = 0; |
|
|
|
|
|
uint64_t r; |
|
|
|
|
|
|
|
|
|
|
|
int32_t r0, r1; |
|
|
|
|
|
|
|
|
int32_t PQCLEAN_DILITHIUM2_CLEAN_decompose_ORG(int32_t *a0, int32_t a) { |
|
|
|
|
|
/* TODO: |
|
|
|
|
|
a % Q is skipped, as it seems a<Q always. In case this |
|
|
|
|
|
needs to be done, then we can use a fact that Q is a |
|
|
|
|
|
Generalized Marsenne Prime, so modular redc is fast |
|
|
|
|
|
(see work by Jerome Solina and Crandall '92 algo). |
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
assert(a>0); assert(a<Q); |
|
|
assert(a>0); assert(a<Q); |
|
|
|
|
|
|
|
|
// mod ALPHA |
|
|
|
|
|
static const uint32_t u = 360800; |
|
|
|
|
|
r = ((uint64_t)a)*u; |
|
|
|
|
|
r >>= 36; |
|
|
|
|
|
r *= 2 * GAMMA2; |
|
|
|
|
|
r = a - r; |
|
|
|
|
|
|
|
|
|
|
|
if (r>(2*GAMMA2)) { |
|
|
|
|
|
r -= 2*GAMMA2; |
|
|
|
|
|
|
|
|
// Use Barrett reduction to calculate r0 = r % A. The |
|
|
|
|
|
// code calculates: |
|
|
|
|
|
// floor(a/A) = floor( (a * R) / 2^M) |
|
|
|
|
|
// where, |
|
|
|
|
|
// M is so that 2^M>= A^2 |
|
|
|
|
|
// r = floor(2^M / A) |
|
|
|
|
|
static const uint32_t M = 36; |
|
|
|
|
|
// Precomputed reciprocal r = floor((2^36) / 190464 |
|
|
|
|
|
static const uint32_t R = 360800; |
|
|
|
|
|
// As per spec ALPHA, A = 2*GAMMA2 = (Q-1)/88 * 2 (Dilithium2) |
|
|
|
|
|
static const uint32_t A = 2*GAMMA2; |
|
|
|
|
|
|
|
|
|
|
|
// a0 |
|
|
|
|
|
int32_t r; |
|
|
|
|
|
int32_t v,w,z; |
|
|
|
|
|
// Barrett reduction: |
|
|
|
|
|
// a0' = a mod A = a - A*floor((a*r) / 2^M) |
|
|
|
|
|
r = (int32_t)((((uint64_t)a)*R) >> M); |
|
|
|
|
|
r = a - r*A; |
|
|
|
|
|
|
|
|
|
|
|
v = ((A-r)>>31) & 1; |
|
|
|
|
|
w = ((GAMMA2 - r)>>31) & 1; |
|
|
|
|
|
z = (((A + GAMMA2) -r) >> 31) & 1; |
|
|
|
|
|
// printf("%d %d %d\n", v,w,z); |
|
|
|
|
|
*a0 = r - (((!z)&(v|w))*A) - (z)*2*A; |
|
|
|
|
|
|
|
|
|
|
|
/* |
|
|
|
|
|
// REDC |
|
|
|
|
|
if (r>(int32_t)A) { |
|
|
|
|
|
r -= A; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
r1 = ((int32_t)r)*2*GAMMA2; |
|
|
|
|
|
|
|
|
|
|
|
// centrize |
|
|
// centrize |
|
|
if (r > GAMMA2) { |
|
|
|
|
|
*a0 = (int32_t)r - 2*GAMMA2; |
|
|
|
|
|
|
|
|
if (r > (int32_t)GAMMA2) { |
|
|
|
|
|
*a0 = (int32_t)r - A; |
|
|
} else { |
|
|
} else { |
|
|
*a0 = r; |
|
|
*a0 = r; |
|
|
} |
|
|
} |
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
// a1 |
|
|
|
|
|
uint64_t a2 = a - *a0; |
|
|
|
|
|
|
|
|
// OLD |
|
|
|
|
|
a1 = (a + 127) >> 7; |
|
|
|
|
|
a1 = (a1 * 11275 + (1 << 23)) >> 24; |
|
|
|
|
|
a1 ^= ((43 - a1) >> 31) & a1; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
v = ((int32_t)a2 - Q + 1); |
|
|
|
|
|
//a2 = (!v); |
|
|
|
|
|
*a0 -= !v; |
|
|
|
|
|
a2 = (!!v)*a2; |
|
|
// CASE: r-r0 = q-1 => r1=0, r0 = r0-1 |
|
|
// CASE: r-r0 = q-1 => r1=0, r0 = r0-1 |
|
|
uint64_t a2 = (uint64_t)a - *a0; |
|
|
|
|
|
|
|
|
#if 0 |
|
|
if (a2 == (Q-1)) { |
|
|
if (a2 == (Q-1)) { |
|
|
a2 = 0; |
|
|
a2 = 0; |
|
|
*a0--; |
|
|
|
|
|
|
|
|
*a0 = *a0-1; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// divide (r-r0)/alpha |
|
|
|
|
|
// int32_t a2 = ((uint64_t)a-*a0)/(2*GAMMA2); |
|
|
|
|
|
if ( (a2 >= (2*GAMMA2))) { |
|
|
|
|
|
a2 = (a2*u) >> 36; |
|
|
|
|
|
// a2 is divisible by ALPHA=(2*GAMMA2) and hence |
|
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
// divide (r-r0)/A |
|
|
|
|
|
// int32_t a2 = ((uint64_t)a-*a0)/(A); |
|
|
|
|
|
v = ((int32_t)a2-A) >> 31; |
|
|
|
|
|
a2 = (!v)*(((a2*R) >> M) + 1) + v*a2; |
|
|
|
|
|
/* |
|
|
|
|
|
if ( (a2 >= (A))) { |
|
|
|
|
|
a2 = (a2*R) >> M; |
|
|
|
|
|
// a2 is divisible by ALPHA=(A) and hence |
|
|
// it will always be off by one. |
|
|
// it will always be off by one. |
|
|
a2++; |
|
|
a2++; |
|
|
} |
|
|
} |
|
|
|
|
|
*/ |
|
|
|
|
|
return a2; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//if (!a1) a2 = a1; |
|
|
|
|
|
|
|
|
|
|
|
//*a0 = a - a1 * 2 * GAMMA2; |
|
|
|
|
|
//*a0 -= (((Q - 1) / 2 - *a0) >> 31) & Q; |
|
|
|
|
|
if (a1 != (int32_t)a2) |
|
|
|
|
|
printf("OZAPTF: (A1=%d, A2=%d, A=%d R=%d)\n", |
|
|
|
|
|
a1, (int32_t)a2, a, (a-(*a0))); |
|
|
|
|
|
// printf("OZAPTF: %d %d %d\n", a, *a0, (a-*a0)); |
|
|
|
|
|
|
|
|
int32_t PQCLEAN_DILITHIUM2_CLEAN_decompose(int32_t *a0, int32_t a) { |
|
|
|
|
|
/* TODO: |
|
|
|
|
|
a % Q is skipped, as it seems a<Q always. In case this |
|
|
|
|
|
needs to be done, then we can use a fact that Q is a |
|
|
|
|
|
Generalized Marsenne Prime, so modular redc is fast |
|
|
|
|
|
(see work by Jerome Solina and Crandall '92 algo). |
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
// Use Barrett reduction to calculate r0 = r % A. The |
|
|
|
|
|
// code calculates: |
|
|
|
|
|
// floor(a/A) = floor( (a * R) / 2^M) |
|
|
|
|
|
// where, |
|
|
|
|
|
// M is so that 2^M>= A^2 |
|
|
|
|
|
// r = floor(2^M / A) |
|
|
|
|
|
static const uint32_t M = 36; |
|
|
|
|
|
// Precomputed reciprocal r = floor((2^36) / 190464 |
|
|
|
|
|
static const uint32_t R = 360800; |
|
|
|
|
|
// As per spec ALPHA, A = 2*GAMMA2 = (Q-1)/88 * 2 (Dilithium2) |
|
|
|
|
|
static const uint32_t A = 2*GAMMA2; |
|
|
|
|
|
|
|
|
|
|
|
// a0 |
|
|
|
|
|
int32_t r; |
|
|
|
|
|
int32_t v,w,z; |
|
|
|
|
|
// Barrett reduction: |
|
|
|
|
|
// a0' = a mod A = a - A*floor((a*r) / 2^M) |
|
|
|
|
|
r = (int32_t)((((uint64_t)a)*R) >> M); |
|
|
|
|
|
r = a - r*A; |
|
|
|
|
|
|
|
|
|
|
|
v = ((A-r)>>31) & 1; |
|
|
|
|
|
w = ((GAMMA2 - r)>>31) & 1; |
|
|
|
|
|
z = (((A + GAMMA2) -r) >> 31) & 1; |
|
|
|
|
|
*a0 = r - (((!z)&(v|w))*A) - (z)*2*A; |
|
|
|
|
|
|
|
|
|
|
|
// a1 |
|
|
|
|
|
uint64_t a2 = a - *a0; |
|
|
|
|
|
v = ((int32_t)a2 - Q + 1); |
|
|
|
|
|
*a0 -= !v; |
|
|
|
|
|
a2 = (!!v)*a2; |
|
|
|
|
|
v = ((int32_t)a2-A) >> 31; |
|
|
|
|
|
a2 = (!v)*(((a2*R) >> M) + 1) + v*a2; |
|
|
return a2; |
|
|
return a2; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|