diff --git a/CMakeLists.txt b/CMakeLists.txt index b66b286f..e85bfa28 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -555,6 +555,14 @@ target_link_libraries( pqclean_dilithium5_clean ) +install(TARGETS pqclean pqclean_s + PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) +install(FILES + ${QRS_PUBLIC_INC} + DESTINATION include/pqclean) + # TODO: this requires changes to testvectors.c # add_executable( # test diff --git a/src/sign/dilithium/dilithium2/clean/rounding.c b/src/sign/dilithium/dilithium2/clean/rounding.c index 21376539..b2e5a1d5 100644 --- a/src/sign/dilithium/dilithium2/clean/rounding.c +++ b/src/sign/dilithium/dilithium2/clean/rounding.c @@ -38,66 +38,126 @@ int32_t PQCLEAN_DILITHIUM2_CLEAN_power2round(int32_t *a0, int32_t a) { * * Returns a1. **************************************************/ -int32_t PQCLEAN_DILITHIUM2_CLEAN_decompose(int32_t *a0, int32_t a) { - int32_t a1 = 0; - uint64_t r; - - int32_t r0, r1; +int32_t PQCLEAN_DILITHIUM2_CLEAN_decompose_ORG(int32_t *a0, int32_t a) { + /* TODO: + a % Q is skipped, as it seems a0); assert(a>= 36; - r *= 2 * GAMMA2; - r = a - r; - - if (r>(2*GAMMA2)) { - r -= 2*GAMMA2; + // Use Barrett reduction to calculate r0 = r % A. The + // code calculates: + // floor(a/A) = floor( (a * R) / 2^M) + // where, + // M is so that 2^M>= A^2 + // r = floor(2^M / A) + static const uint32_t M = 36; + // Precomputed reciprocal r = floor((2^36) / 190464 + static const uint32_t R = 360800; + // As per spec ALPHA, A = 2*GAMMA2 = (Q-1)/88 * 2 (Dilithium2) + static const uint32_t A = 2*GAMMA2; + + // a0 + int32_t r; + int32_t v,w,z; + // Barrett reduction: + // a0' = a mod A = a - A*floor((a*r) / 2^M) + r = (int32_t)((((uint64_t)a)*R) >> M); + r = a - r*A; + + v = ((A-r)>>31) & 1; + w = ((GAMMA2 - r)>>31) & 1; + z = (((A + GAMMA2) -r) >> 31) & 1; +// printf("%d %d %d\n", v,w,z); + *a0 = r - (((!z)&(v|w))*A) - (z)*2*A; + + /* + // REDC + if (r>(int32_t)A) { + r -= A; } - r1 = ((int32_t)r)*2*GAMMA2; - // centrize - if (r > GAMMA2) { - *a0 = (int32_t)r - 2*GAMMA2; + if (r > (int32_t)GAMMA2) { + *a0 = (int32_t)r - A; } else { *a0 = r; } + */ + // a1 + uint64_t a2 = a - *a0; - // OLD - a1 = (a + 127) >> 7; - a1 = (a1 * 11275 + (1 << 23)) >> 24; - a1 ^= ((43 - a1) >> 31) & a1; - + v = ((int32_t)a2 - Q + 1); + //a2 = (!v); + *a0 -= !v; + a2 = (!!v)*a2; // CASE: r-r0 = q-1 => r1=0, r0 = r0-1 - uint64_t a2 = (uint64_t)a - *a0; + #if 0 if (a2 == (Q-1)) { a2 = 0; - *a0--; + *a0 = *a0-1; } - - // divide (r-r0)/alpha - // int32_t a2 = ((uint64_t)a-*a0)/(2*GAMMA2); - if ( (a2 >= (2*GAMMA2))) { - a2 = (a2*u) >> 36; - // a2 is divisible by ALPHA=(2*GAMMA2) and hence + #endif + + // divide (r-r0)/A + // int32_t a2 = ((uint64_t)a-*a0)/(A); + v = ((int32_t)a2-A) >> 31; + a2 = (!v)*(((a2*R) >> M) + 1) + v*a2; + /* + if ( (a2 >= (A))) { + a2 = (a2*R) >> M; + // a2 is divisible by ALPHA=(A) and hence // it will always be off by one. a2++; } + */ + return a2; +} - - - //if (!a1) a2 = a1; - - //*a0 = a - a1 * 2 * GAMMA2; - //*a0 -= (((Q - 1) / 2 - *a0) >> 31) & Q; - if (a1 != (int32_t)a2) - printf("OZAPTF: (A1=%d, A2=%d, A=%d R=%d)\n", - a1, (int32_t)a2, a, (a-(*a0))); -// printf("OZAPTF: %d %d %d\n", a, *a0, (a-*a0)); +int32_t PQCLEAN_DILITHIUM2_CLEAN_decompose(int32_t *a0, int32_t a) { + /* TODO: + a % Q is skipped, as it seems a= A^2 + // r = floor(2^M / A) + static const uint32_t M = 36; + // Precomputed reciprocal r = floor((2^36) / 190464 + static const uint32_t R = 360800; + // As per spec ALPHA, A = 2*GAMMA2 = (Q-1)/88 * 2 (Dilithium2) + static const uint32_t A = 2*GAMMA2; + + // a0 + int32_t r; + int32_t v,w,z; + // Barrett reduction: + // a0' = a mod A = a - A*floor((a*r) / 2^M) + r = (int32_t)((((uint64_t)a)*R) >> M); + r = a - r*A; + + v = ((A-r)>>31) & 1; + w = ((GAMMA2 - r)>>31) & 1; + z = (((A + GAMMA2) -r) >> 31) & 1; + *a0 = r - (((!z)&(v|w))*A) - (z)*2*A; + + // a1 + uint64_t a2 = a - *a0; + v = ((int32_t)a2 - Q + 1); + *a0 -= !v; + a2 = (!!v)*a2; + v = ((int32_t)a2-A) >> 31; + a2 = (!v)*(((a2*R) >> M) + 1) + v*a2; return a2; }