barett based reduction

This commit is contained in:
Henry Case 2021-03-15 09:09:48 +00:00
부모 85148087ad
커밋 54d54ce47b
2개의 변경된 파일104개의 추가작업 그리고 36개의 파일을 삭제

파일 보기

@ -555,6 +555,14 @@ target_link_libraries(
pqclean_dilithium5_clean
)
install(TARGETS pqclean pqclean_s
PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)
install(FILES
${QRS_PUBLIC_INC}
DESTINATION include/pqclean)
# TODO: this requires changes to testvectors.c
# add_executable(
# test

파일 보기

@ -38,66 +38,126 @@ int32_t PQCLEAN_DILITHIUM2_CLEAN_power2round(int32_t *a0, int32_t a) {
*
* Returns a1.
**************************************************/
int32_t PQCLEAN_DILITHIUM2_CLEAN_decompose(int32_t *a0, int32_t a) {
int32_t a1 = 0;
uint64_t r;
int32_t r0, r1;
int32_t PQCLEAN_DILITHIUM2_CLEAN_decompose_ORG(int32_t *a0, int32_t a) {
/* TODO:
a % Q is skipped, as it seems a<Q always. In case this
needs to be done, then we can use a fact that Q is a
Generalized Marsenne Prime, so modular redc is fast
(see work by Jerome Solina and Crandall '92 algo).
*/
assert(a>0); assert(a<Q);
// mod ALPHA
static const uint32_t u = 360800;
r = ((uint64_t)a)*u;
r >>= 36;
r *= 2 * GAMMA2;
r = a - r;
// Use Barrett reduction to calculate r0 = r % A. The
// code calculates:
// floor(a/A) = floor( (a * R) / 2^M)
// where,
// M is so that 2^M>= A^2
// r = floor(2^M / A)
static const uint32_t M = 36;
// Precomputed reciprocal r = floor((2^36) / 190464
static const uint32_t R = 360800;
// As per spec ALPHA, A = 2*GAMMA2 = (Q-1)/88 * 2 (Dilithium2)
static const uint32_t A = 2*GAMMA2;
if (r>(2*GAMMA2)) {
r -= 2*GAMMA2;
// a0
int32_t r;
int32_t v,w,z;
// Barrett reduction:
// a0' = a mod A = a - A*floor((a*r) / 2^M)
r = (int32_t)((((uint64_t)a)*R) >> M);
r = a - r*A;
v = ((A-r)>>31) & 1;
w = ((GAMMA2 - r)>>31) & 1;
z = (((A + GAMMA2) -r) >> 31) & 1;
// printf("%d %d %d\n", v,w,z);
*a0 = r - (((!z)&(v|w))*A) - (z)*2*A;
/*
// REDC
if (r>(int32_t)A) {
r -= A;
}
r1 = ((int32_t)r)*2*GAMMA2;
// centrize
if (r > GAMMA2) {
*a0 = (int32_t)r - 2*GAMMA2;
if (r > (int32_t)GAMMA2) {
*a0 = (int32_t)r - A;
} else {
*a0 = r;
}
*/
// a1
uint64_t a2 = a - *a0;
// OLD
a1 = (a + 127) >> 7;
a1 = (a1 * 11275 + (1 << 23)) >> 24;
a1 ^= ((43 - a1) >> 31) & a1;
v = ((int32_t)a2 - Q + 1);
//a2 = (!v);
*a0 -= !v;
a2 = (!!v)*a2;
// CASE: r-r0 = q-1 => r1=0, r0 = r0-1
uint64_t a2 = (uint64_t)a - *a0;
#if 0
if (a2 == (Q-1)) {
a2 = 0;
*a0--;
*a0 = *a0-1;
}
#endif
// divide (r-r0)/alpha
// int32_t a2 = ((uint64_t)a-*a0)/(2*GAMMA2);
if ( (a2 >= (2*GAMMA2))) {
a2 = (a2*u) >> 36;
// a2 is divisible by ALPHA=(2*GAMMA2) and hence
// divide (r-r0)/A
// int32_t a2 = ((uint64_t)a-*a0)/(A);
v = ((int32_t)a2-A) >> 31;
a2 = (!v)*(((a2*R) >> M) + 1) + v*a2;
/*
if ( (a2 >= (A))) {
a2 = (a2*R) >> M;
// a2 is divisible by ALPHA=(A) and hence
// it will always be off by one.
a2++;
}
*/
return a2;
}
int32_t PQCLEAN_DILITHIUM2_CLEAN_decompose(int32_t *a0, int32_t a) {
/* TODO:
a % Q is skipped, as it seems a<Q always. In case this
needs to be done, then we can use a fact that Q is a
Generalized Marsenne Prime, so modular redc is fast
(see work by Jerome Solina and Crandall '92 algo).
*/
// Use Barrett reduction to calculate r0 = r % A. The
// code calculates:
// floor(a/A) = floor( (a * R) / 2^M)
// where,
// M is so that 2^M>= A^2
// r = floor(2^M / A)
static const uint32_t M = 36;
// Precomputed reciprocal r = floor((2^36) / 190464
static const uint32_t R = 360800;
// As per spec ALPHA, A = 2*GAMMA2 = (Q-1)/88 * 2 (Dilithium2)
static const uint32_t A = 2*GAMMA2;
//if (!a1) a2 = a1;
// a0
int32_t r;
int32_t v,w,z;
// Barrett reduction:
// a0' = a mod A = a - A*floor((a*r) / 2^M)
r = (int32_t)((((uint64_t)a)*R) >> M);
r = a - r*A;
//*a0 = a - a1 * 2 * GAMMA2;
//*a0 -= (((Q - 1) / 2 - *a0) >> 31) & Q;
if (a1 != (int32_t)a2)
printf("OZAPTF: (A1=%d, A2=%d, A=%d R=%d)\n",
a1, (int32_t)a2, a, (a-(*a0)));
// printf("OZAPTF: %d %d %d\n", a, *a0, (a-*a0));
v = ((A-r)>>31) & 1;
w = ((GAMMA2 - r)>>31) & 1;
z = (((A + GAMMA2) -r) >> 31) & 1;
*a0 = r - (((!z)&(v|w))*A) - (z)*2*A;
// a1
uint64_t a2 = a - *a0;
v = ((int32_t)a2 - Q + 1);
*a0 -= !v;
a2 = (!!v)*a2;
v = ((int32_t)a2-A) >> 31;
a2 = (!v)*(((a2*R) >> M) + 1) + v*a2;
return a2;
}