diff --git a/crypto_kem/ntruhps2048509/META.yml b/crypto_kem/ntruhps2048509/META.yml index cf2f3632..bb2d543c 100644 --- a/crypto_kem/ntruhps2048509/META.yml +++ b/crypto_kem/ntruhps2048509/META.yml @@ -20,4 +20,4 @@ auxiliary-submitters: - Zhenfei Zhang implementations: - name: clean - version: https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-2/submissions/NTRU-Round2.zip reference implemntation + version: https://github.com/jschanck/ntru/tree/485dde03 reference implementation diff --git a/crypto_kem/ntruhps2048509/clean/Makefile b/crypto_kem/ntruhps2048509/clean/Makefile index e8b20e47..6d8334f5 100644 --- a/crypto_kem/ntruhps2048509/clean/Makefile +++ b/crypto_kem/ntruhps2048509/clean/Makefile @@ -2,7 +2,7 @@ LIB=libntruhps2048509_clean.a HEADERS=api.h crypto_sort.h owcpa.h params.h poly.h sample.h verify.h -OBJECTS=crypto_sort.o kem.o owcpa.o pack3.o packq.o poly.o sample.o verify.o +OBJECTS=crypto_sort.o kem.o owcpa.o pack3.o packq.o poly.o poly_lift.o poly_mod.o poly_r2_inv.o poly_rq_mul.o poly_s3_inv.o sample.o sample_iid.o verify.o CFLAGS=-O3 -Wall -Wextra -Wpedantic -Wvla -Werror -Wredundant-decls -Wmissing-prototypes -std=c99 -I../../../common $(EXTRAFLAGS) diff --git a/crypto_kem/ntruhps2048509/clean/Makefile.Microsoft_nmake b/crypto_kem/ntruhps2048509/clean/Makefile.Microsoft_nmake index 7dd29cb6..cf072b31 100644 --- a/crypto_kem/ntruhps2048509/clean/Makefile.Microsoft_nmake +++ b/crypto_kem/ntruhps2048509/clean/Makefile.Microsoft_nmake @@ -2,7 +2,7 @@ # nmake /f Makefile.Microsoft_nmake LIBRARY=libntruhps2048509_clean.lib -OBJECTS=crypto_sort.obj kem.obj owcpa.obj pack3.obj packq.obj poly.obj sample.obj verify.obj +OBJECTS=crypto_sort.obj kem.obj owcpa.obj pack3.obj packq.obj poly.obj poly_lift.obj poly_mod.obj poly_r2_inv.obj poly_rq_mul.obj poly_s3_inv.obj sample.obj sample_iid.obj verify.obj CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX diff --git a/crypto_kem/ntruhps2048509/clean/kem.c b/crypto_kem/ntruhps2048509/clean/kem.c index 97351508..c4934cb5 100644 --- a/crypto_kem/ntruhps2048509/clean/kem.c +++ b/crypto_kem/ntruhps2048509/clean/kem.c @@ -5,6 +5,7 @@ #include "owcpa.h" #include "params.h" #include "randombytes.h" +#include "sample.h" #include "verify.h" // API FUNCTIONS @@ -20,15 +21,20 @@ int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { } int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk) { + poly r, m; uint8_t rm[NTRU_OWCPA_MSGBYTES]; uint8_t rm_seed[NTRU_SAMPLE_RM_BYTES]; randombytes(rm_seed, NTRU_SAMPLE_RM_BYTES); - PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_samplemsg(rm, rm_seed); + PQCLEAN_NTRUHPS2048509_CLEAN_sample_rm(&r, &m, rm_seed); + + PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(rm, &r); + PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, &m); sha3_256(k, rm, NTRU_OWCPA_MSGBYTES); - PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_enc(c, rm, pk); + PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(&r); + PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_enc(c, &r, &m, pk); return 0; } @@ -38,9 +44,14 @@ int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_dec(uint8_t *k, const uint8_t *c, co uint8_t rm[NTRU_OWCPA_MSGBYTES]; uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES]; - fail = PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec(rm, c, sk); - /* If fail = 0 then c = Enc(h, rm), there is no need to re-encapsulate. */ - /* See comment in PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec for details. */ + fail = 0; + + /* Check that unused bits of last byte of ciphertext are zero */ + fail |= c[NTRU_CIPHERTEXTBYTES - 1] & (0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG)))); + + fail |= PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec(rm, c, sk); + /* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */ + /* See comment in PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec for details. */ sha3_256(k, rm, NTRU_OWCPA_MSGBYTES); diff --git a/crypto_kem/ntruhps2048509/clean/owcpa.c b/crypto_kem/ntruhps2048509/clean/owcpa.c index 5b0b2c2d..31c2e02c 100644 --- a/crypto_kem/ntruhps2048509/clean/owcpa.c +++ b/crypto_kem/ntruhps2048509/clean/owcpa.c @@ -13,7 +13,7 @@ static int owcpa_check_r(const poly *r) { t |= c & (NTRU_Q - 4); /* 0 if c is in {0,1,2,3} */ t |= (c + 1) & 0x4; /* 0 if c is in {0,1,2} */ } - t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */ + t |= MODQ(r->coeffs[NTRU_N - 1]); /* Coefficient n-1 must be zero */ t = (~t + 1); // two's complement t >>= 63; return (int) t; @@ -38,16 +38,6 @@ static int owcpa_check_m(const poly *m) { return (int) t; } -void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_samplemsg(unsigned char msg[NTRU_OWCPA_MSGBYTES], - const unsigned char seed[NTRU_SAMPLE_RM_BYTES]) { - poly r, m; - - PQCLEAN_NTRUHPS2048509_CLEAN_sample_rm(&r, &m, seed); - - PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(msg, &r); - PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(msg + NTRU_PACK_TRINARY_BYTES, &m); -} - void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_keypair(unsigned char *pk, unsigned char *sk, const unsigned char seed[NTRU_SAMPLE_FG_BYTES]) { @@ -55,9 +45,8 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_keypair(unsigned char *pk, poly x1, x2, x3, x4, x5; - poly *f = &x1, *invf_mod3 = &x2; - poly *g = &x3, *G = &x2; - poly *Gf = &x3, *invGf = &x4, *tmp = &x5; + poly *f = &x1, *g = &x2, *invf_mod3 = &x3; + poly *gf = &x3, *invgf = &x4, *tmp = &x5; poly *invh = &x3, *h = &x3; PQCLEAN_NTRUHPS2048509_CLEAN_sample_fg(f, g, seed); @@ -70,45 +59,41 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_keypair(unsigned char *pk, PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(f); PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(g); - /* G = 3*g */ + /* g = 3*g */ for (i = 0; i < NTRU_N; i++) { - G->coeffs[i] = MODQ(3 * g->coeffs[i]); + g->coeffs[i] = 3 * g->coeffs[i]; } - PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(Gf, G, f); + PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(gf, g, f); - PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_inv(invGf, Gf); + PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_inv(invgf, gf); - PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(tmp, invGf, f); + PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(tmp, invgf, f); PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_mul(invh, tmp, f); PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_tobytes(sk + 2 * NTRU_PACK_TRINARY_BYTES, invh); - PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(tmp, invGf, G); - PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(h, tmp, G); + PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(tmp, invgf, g); + PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(h, tmp, g); PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_tobytes(pk, h); } void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_enc(unsigned char *c, - const unsigned char *rm, + const poly *r, + const poly *m, const unsigned char *pk) { int i; - poly x1, x2, x3; + poly x1, x2; poly *h = &x1, *liftm = &x1; - poly *r = &x2, *m = &x2; - poly *ct = &x3; + poly *ct = &x2; PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_frombytes(h, pk); - PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_frombytes(r, rm); - PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(r); - PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(ct, r, h); - PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_frombytes(m, rm + NTRU_PACK_TRINARY_BYTES); PQCLEAN_NTRUHPS2048509_CLEAN_poly_lift(liftm, m); for (i = 0; i < NTRU_N; i++) { - ct->coeffs[i] = MODQ(ct->coeffs[i] + liftm->coeffs[i]); + ct->coeffs[i] = ct->coeffs[i] + liftm->coeffs[i]; } PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_tobytes(c, ct); @@ -147,7 +132,7 @@ int PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec(unsigned char *rm, /* b = c - Lift(m) mod (q, x^n - 1) */ PQCLEAN_NTRUHPS2048509_CLEAN_poly_lift(liftm, m); for (i = 0; i < NTRU_N; i++) { - b->coeffs[i] = MODQ(c->coeffs[i] - liftm->coeffs[i]); + b->coeffs[i] = c->coeffs[i] - liftm->coeffs[i]; } /* r = b / h mod (q, Phi_n) */ diff --git a/crypto_kem/ntruhps2048509/clean/owcpa.h b/crypto_kem/ntruhps2048509/clean/owcpa.h index d18c28e7..a910b6c1 100644 --- a/crypto_kem/ntruhps2048509/clean/owcpa.h +++ b/crypto_kem/ntruhps2048509/clean/owcpa.h @@ -2,6 +2,7 @@ #define OWCPA_H #include "params.h" +#include "poly.h" void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_samplemsg(unsigned char msg[NTRU_OWCPA_MSGBYTES], const unsigned char seed[NTRU_SEEDBYTES]); @@ -11,7 +12,8 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_keypair(unsigned char *pk, const unsigned char seed[NTRU_SEEDBYTES]); void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_enc(unsigned char *c, - const unsigned char *rm, + const poly *r, + const poly *m, const unsigned char *pk); int PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec(unsigned char *rm, diff --git a/crypto_kem/ntruhps2048509/clean/pack3.c b/crypto_kem/ntruhps2048509/clean/pack3.c index a5be6758..cf1873fb 100644 --- a/crypto_kem/ntruhps2048509/clean/pack3.c +++ b/crypto_kem/ntruhps2048509/clean/pack3.c @@ -13,8 +13,6 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(unsigned char msg[NTRU_OWCPA_M c = (3 * c + a->coeffs[5 * i + 0]) & 255; msg[i] = c; } - - // if ((NTRU_N - 1) % 5 != 0) i = NTRU_PACK_DEG / 5; c = 0; for (j = NTRU_PACK_DEG - (5 * i) - 1; j >= 0; j--) { @@ -31,21 +29,19 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_frombytes(poly *r, const unsigned char for (i = 0; i < NTRU_PACK_DEG / 5; i++) { c = msg[i]; - r->coeffs[5 * i + 0] = PQCLEAN_NTRUHPS2048509_CLEAN_mod3(c); - r->coeffs[5 * i + 1] = PQCLEAN_NTRUHPS2048509_CLEAN_mod3(c * 171 >> 9); // this is division by 3 - r->coeffs[5 * i + 2] = PQCLEAN_NTRUHPS2048509_CLEAN_mod3(c * 57 >> 9); // division by 3^2 - r->coeffs[5 * i + 3] = PQCLEAN_NTRUHPS2048509_CLEAN_mod3(c * 19 >> 9); // division by 3^3 - r->coeffs[5 * i + 4] = PQCLEAN_NTRUHPS2048509_CLEAN_mod3(c * 203 >> 14); // etc. + r->coeffs[5 * i + 0] = c; + r->coeffs[5 * i + 1] = c * 171 >> 9; // this is division by 3 + r->coeffs[5 * i + 2] = c * 57 >> 9; // division by 3^2 + r->coeffs[5 * i + 3] = c * 19 >> 9; // division by 3^3 + r->coeffs[5 * i + 4] = c * 203 >> 14; // etc. } - - // if ((NTRU_N - 1) % 5 != 0) i = NTRU_PACK_DEG / 5; c = msg[i]; for (j = 0; (5 * i + j) < NTRU_PACK_DEG; j++) { - r->coeffs[5 * i + j] = PQCLEAN_NTRUHPS2048509_CLEAN_mod3(c); + r->coeffs[5 * i + j] = c; c = c * 171 >> 9; } - r->coeffs[NTRU_N - 1] = 0; + PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(r); } diff --git a/crypto_kem/ntruhps2048509/clean/packq.c b/crypto_kem/ntruhps2048509/clean/packq.c index 9aa97b79..9f99167b 100644 --- a/crypto_kem/ntruhps2048509/clean/packq.c +++ b/crypto_kem/ntruhps2048509/clean/packq.c @@ -6,7 +6,7 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_tobytes(unsigned char *r, const poly * for (i = 0; i < NTRU_PACK_DEG / 8; i++) { for (j = 0; j < 8; j++) { - t[j] = a->coeffs[8 * i + j]; + t[j] = MODQ(a->coeffs[8 * i + j]); } r[11 * i + 0] = (unsigned char) ( t[0] & 0xff); @@ -23,27 +23,29 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_tobytes(unsigned char *r, const poly * } for (j = 0; j < NTRU_PACK_DEG - 8 * i; j++) { - t[j] = a->coeffs[8 * i + j]; + t[j] = MODQ(a->coeffs[8 * i + j]); } for (; j < 8; j++) { t[j] = 0; } - switch (NTRU_PACK_DEG - 8 * (NTRU_PACK_DEG / 8)) { - case 6: - r[11 * i + 8] = (unsigned char) ((t[5] >> 9) | ((t[6] & 0x3f) << 2)); - r[11 * i + 7] = (unsigned char) ((t[5] >> 1) & 0xff); - r[11 * i + 6] = (unsigned char) ((t[4] >> 4) | ((t[5] & 0x01) << 7)); - // fallthrough + + switch (NTRU_PACK_DEG & 0x07) { + // cases 0 and 6 are impossible since 2 generates (Z/n)* and + // p mod 8 in {1, 7} implies that 2 is a quadratic residue. case 4: - r[11 * i + 5] = (unsigned char) ((t[3] >> 7) | ((t[4] & 0x0f) << 4)); - r[11 * i + 4] = (unsigned char) ((t[2] >> 10) | ((t[3] & 0x7f) << 1)); - r[11 * i + 3] = (unsigned char) ((t[2] >> 2) & 0xff); - // fallthrough + r[11 * i + 0] = (unsigned char) (t[0] & 0xff); + r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3); + r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6); + r[11 * i + 3] = (unsigned char) (t[2] >> 2) & 0xff; + r[11 * i + 4] = (unsigned char) (t[2] >> 10) | ((t[3] & 0x7f) << 1); + r[11 * i + 5] = (unsigned char) (t[3] >> 7) | ((t[4] & 0x0f) << 4); + break; case 2: - r[11 * i + 2] = (unsigned char) ((t[1] >> 5) | ((t[2] & 0x03) << 6)); - r[11 * i + 1] = (unsigned char) ((t[0] >> 8) | ((t[1] & 0x1f) << 3)); - r[11 * i + 0] = (unsigned char) ( t[0] & 0xff); + r[11 * i + 0] = (unsigned char) (t[0] & 0xff); + r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3); + r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6); + break; } } @@ -59,19 +61,22 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_frombytes(poly *r, const unsigned char r->coeffs[8 * i + 6] = (a[11 * i + 8] >> 2) | (((uint16_t)a[11 * i + 9] & 0x1f) << 6); r->coeffs[8 * i + 7] = (a[11 * i + 9] >> 5) | (((uint16_t)a[11 * i + 10] & 0xff) << 3); } - switch (NTRU_PACK_DEG - 8 * (NTRU_PACK_DEG / 8)) { - case 6: - r->coeffs[8 * i + 5] = (a[11 * i + 6] >> 7) | (((uint16_t)a[11 * i + 7] & 0xff) << 1) | (((uint16_t)a[11 * i + 8] & 0x03) << 9); - r->coeffs[8 * i + 4] = (a[11 * i + 5] >> 4) | (((uint16_t)a[11 * i + 6] & 0x7f) << 4); - // fallthrough + + switch (NTRU_PACK_DEG & 0x07) { + // cases 0 and 6 are impossible since 2 generates (Z/n)* and + // p mod 8 in {1, 7} implies that 2 is a quadratic residue. case 4: - r->coeffs[8 * i + 3] = (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7); - r->coeffs[8 * i + 2] = (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10); - // fallthrough + r->coeffs[8 * i + 0] = (unsigned char) (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8); + r->coeffs[8 * i + 1] = (unsigned char) (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5); + r->coeffs[8 * i + 2] = (unsigned char) (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10); + r->coeffs[8 * i + 3] = (unsigned char) (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7); + break; case 2: - r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5); - r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8); + r->coeffs[8 * i + 0] = (unsigned char) (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8); + r->coeffs[8 * i + 1] = (unsigned char) (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5); + break; } + r->coeffs[NTRU_N - 1] = 0; } void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a) { @@ -85,7 +90,6 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_frombytes(poly *r, const unsi /* Set r[n-1] so that the sum of coefficients is zero mod q */ r->coeffs[NTRU_N - 1] = 0; for (i = 0; i < NTRU_PACK_DEG; i++) { - r->coeffs[NTRU_N - 1] += r->coeffs[i]; + r->coeffs[NTRU_N - 1] -= r->coeffs[i]; } - r->coeffs[NTRU_N - 1] = MODQ(-(r->coeffs[NTRU_N - 1])); } diff --git a/crypto_kem/ntruhps2048509/clean/params.h b/crypto_kem/ntruhps2048509/clean/params.h index 49642b0b..42354594 100644 --- a/crypto_kem/ntruhps2048509/clean/params.h +++ b/crypto_kem/ntruhps2048509/clean/params.h @@ -7,6 +7,8 @@ /* Do not modify below this line */ +#define PAD32(X) ((((X) + 31)/32)*32) + #define NTRU_Q (1 << NTRU_LOGQ) #define NTRU_WEIGHT (NTRU_Q/8 - 2) diff --git a/crypto_kem/ntruhps2048509/clean/poly.c b/crypto_kem/ntruhps2048509/clean/poly.c index d74bc902..38cb67f0 100644 --- a/crypto_kem/ntruhps2048509/clean/poly.c +++ b/crypto_kem/ntruhps2048509/clean/poly.c @@ -1,21 +1,4 @@ #include "poly.h" -#include "fips202.h" -#include "verify.h" - -uint16_t PQCLEAN_NTRUHPS2048509_CLEAN_mod3(uint16_t a) { - uint16_t r; - int16_t t, c; - - r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 - r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 - r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 - r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 - - t = r - 3; - c = t >> 15; - - return (c & r) ^ (~c & t); -} /* Map {0, 1, 2} -> {0,1,q-1} in place */ void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(poly *r) { @@ -29,197 +12,22 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(poly *r) { void PQCLEAN_NTRUHPS2048509_CLEAN_poly_trinary_Zq_to_Z3(poly *r) { int i; for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = MODQ(r->coeffs[i]); r->coeffs[i] = 3 & (r->coeffs[i] ^ (r->coeffs[i] >> (NTRU_LOGQ - 1))); } } -void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b) { - int k, i; - - for (k = 0; k < NTRU_N; k++) { - r->coeffs[k] = 0; - for (i = 1; i < NTRU_N - k; i++) { - r->coeffs[k] += a->coeffs[k + i] * b->coeffs[NTRU_N - i]; - } - for (i = 0; i < k + 1; i++) { - r->coeffs[k] += a->coeffs[k - i] * b->coeffs[i]; - } - r->coeffs[k] = MODQ(r->coeffs[k]); - } -} - void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_mul(poly *r, const poly *a, const poly *b) { - int i; PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(r, a, b); - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = MODQ(r->coeffs[i] - r->coeffs[NTRU_N - 1]); - } + PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_q_Phi_n(r); } void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_mul(poly *r, const poly *a, const poly *b) { - int k, i; - - for (k = 0; k < NTRU_N; k++) { - r->coeffs[k] = 0; - for (i = 1; i < NTRU_N - k; i++) { - r->coeffs[k] += a->coeffs[k + i] * b->coeffs[NTRU_N - i]; - } - for (i = 0; i < k + 1; i++) { - r->coeffs[k] += a->coeffs[k - i] * b->coeffs[i]; - } - } - for (k = 0; k < NTRU_N; k++) { - r->coeffs[k] = PQCLEAN_NTRUHPS2048509_CLEAN_mod3(r->coeffs[k] + 2 * r->coeffs[NTRU_N - 1]); - } + PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(r, a, b); + PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(r); } -void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul_x_minus_1(poly *r, const poly *a) { - int i; - uint16_t last_coeff = a->coeffs[NTRU_N - 1]; - - for (i = NTRU_N - 1; i > 0; i--) { - r->coeffs[i] = MODQ(a->coeffs[i - 1] + (NTRU_Q - a->coeffs[i])); - } - r->coeffs[0] = MODQ(last_coeff + (NTRU_Q - a->coeffs[0])); -} - -void PQCLEAN_NTRUHPS2048509_CLEAN_poly_lift(poly *r, const poly *a) { - int i; - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = a->coeffs[i]; - } - PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(r); -} - -void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_to_S3(poly *r, const poly *a) { - /* NOTE: Assumes input is in [0,Q-1]^N */ - /* Produces output in {0,1,2}^N */ - int i; - - /* Center coeffs around 3Q: [0, Q-1] -> [3Q - Q/2, 3Q + Q/2) */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = ((a->coeffs[i] >> (NTRU_LOGQ - 1)) ^ 3) << NTRU_LOGQ; - r->coeffs[i] += a->coeffs[i]; - } - /* Reduce mod (3, Phi) */ - r->coeffs[NTRU_N - 1] = PQCLEAN_NTRUHPS2048509_CLEAN_mod3(r->coeffs[NTRU_N - 1]); - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = PQCLEAN_NTRUHPS2048509_CLEAN_mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); - } -} - -#define POLY_R2_ADD(I,A,B,S) \ - for ((I)=0; (I)coeffs[i] ^ b->coeffs[i]) & swap; - a->coeffs[i] ^= t; - b->coeffs[i] ^= t; - } -} - -static inline void poly_divx(poly *a, int s) { - int i; - - for (i = 1; i < NTRU_N; i++) { - a->coeffs[i - 1] = (unsigned char) ((s * a->coeffs[i]) | (!s * a->coeffs[i - 1])); - } - a->coeffs[NTRU_N - 1] = (!s * a->coeffs[NTRU_N - 1]); -} - -static inline void poly_mulx(poly *a, int s) { - int i; - - for (i = 1; i < NTRU_N; i++) { - a->coeffs[NTRU_N - i] = (unsigned char) ((s * a->coeffs[NTRU_N - i - 1]) | (!s * a->coeffs[NTRU_N - i])); - } - a->coeffs[0] = (!s * a->coeffs[0]); -} - -static void poly_R2_inv(poly *r, const poly *a) { - /* Schroeppel--Orman--O'Malley--Spatscheck - * "Almost Inverse" algorithm as described - * by Silverman in NTRU Tech Report #14 */ - // with several modifications to make it run in constant-time - int i, j; - int k = 0; - uint16_t degf = NTRU_N - 1; - uint16_t degg = NTRU_N - 1; - int sign, t, swap; - int16_t done = 0; - poly b, f, g; - poly *c = r; // save some stack space - poly *temp_r = &f; - - /* b(X) := 1 */ - for (i = 1; i < NTRU_N; i++) { - b.coeffs[i] = 0; - } - b.coeffs[0] = 1; - - /* c(X) := 0 */ - for (i = 0; i < NTRU_N; i++) { - c->coeffs[i] = 0; - } - - /* f(X) := a(X) */ - for (i = 0; i < NTRU_N; i++) { - f.coeffs[i] = a->coeffs[i] & 1; - } - - /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ - for (i = 0; i < NTRU_N; i++) { - g.coeffs[i] = 1; - } - - for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { - sign = f.coeffs[0]; - swap = sign & !done & ((degf - degg) >> 15); - - cswappoly(&f, &g, swap); - cswappoly(&b, c, swap); - t = (degf ^ degg) & (-swap); - degf ^= t; - degg ^= t; - - POLY_R2_ADD(i, f, g, sign * (!done)); - POLY_R2_ADD(i, b, (*c), sign * (!done)); - - poly_divx(&f, !done); - poly_mulx(c, !done); - degf -= !done; - k += !done; - - done = 1 - (((uint16_t) - degf) >> 15); - } - - k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); - - /* Return X^{N-k} * b(X) */ - /* This is a k-coefficient rotation. We do this by looking at the binary - representation of k, rotating for every power of 2, and performing a cmov - if the respective bit is set. */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = b.coeffs[i]; - } - - for (i = 0; i < 10; i++) { - for (j = 0; j < NTRU_N; j++) { - temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; - } - PQCLEAN_NTRUHPS2048509_CLEAN_cmov((unsigned char *) & (r->coeffs), - (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); - k >>= 1; - } -} - -static void poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { +static void PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { int i; poly b, c; @@ -228,7 +36,7 @@ static void poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { // for 0..4 // ai = ai * (2 - a*ai) mod q for (i = 0; i < NTRU_N; i++) { - b.coeffs[i] = MODQ(NTRU_Q - a->coeffs[i]); // b = -a + b.coeffs[i] = -(a->coeffs[i]); } for (i = 0; i < NTRU_N; i++) { @@ -254,92 +62,6 @@ static void poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_inv(poly *r, const poly *a) { poly ai2; - poly_R2_inv(&ai2, a); - poly_R2_inv_to_Rq_inv(r, &ai2, a); -} - -void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_inv(poly *r, const poly *a) { - /* Schroeppel--Orman--O'Malley--Spatscheck - * "Almost Inverse" algorithm as described - * by Silverman in NTRU Tech Report #14 */ - // with several modifications to make it run in constant-time - int i, j; - uint16_t k = 0; - uint16_t degf = NTRU_N - 1; - uint16_t degg = NTRU_N - 1; - int sign, fsign = 0, t, swap; - int16_t done = 0; - poly b, c, f, g; - poly *temp_r = &f; - - /* b(X) := 1 */ - for (i = 1; i < NTRU_N; i++) { - b.coeffs[i] = 0; - } - b.coeffs[0] = 1; - - /* c(X) := 0 */ - for (i = 0; i < NTRU_N; i++) { - c.coeffs[i] = 0; - } - - /* f(X) := a(X) */ - for (i = 0; i < NTRU_N; i++) { - f.coeffs[i] = a->coeffs[i]; - } - - /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ - for (i = 0; i < NTRU_N; i++) { - g.coeffs[i] = 1; - } - - for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { - sign = PQCLEAN_NTRUHPS2048509_CLEAN_mod3(2 * g.coeffs[0] * f.coeffs[0]); - swap = (((sign & 2) >> 1) | sign) & !done & ((degf - degg) >> 15); - - cswappoly(&f, &g, swap); - cswappoly(&b, &c, swap); - t = (degf ^ degg) & (-swap); - degf ^= t; - degg ^= t; - - for (i = 0; i < NTRU_N; i++) { - f.coeffs[i] = PQCLEAN_NTRUHPS2048509_CLEAN_mod3(f.coeffs[i] + ((uint16_t) (sign * (!done))) * g.coeffs[i]); - } - for (i = 0; i < NTRU_N; i++) { - b.coeffs[i] = PQCLEAN_NTRUHPS2048509_CLEAN_mod3(b.coeffs[i] + ((uint16_t) (sign * (!done))) * c.coeffs[i]); - } - - poly_divx(&f, !done); - poly_mulx(&c, !done); - degf -= !done; - k += !done; - - done = 1 - (((uint16_t) - degf) >> 15); - } - - fsign = f.coeffs[0]; - k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); - - /* Return X^{N-k} * b(X) */ - /* This is a k-coefficient rotation. We do this by looking at the binary - representation of k, rotating for every power of 2, and performing a cmov - if the respective bit is set. */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = PQCLEAN_NTRUHPS2048509_CLEAN_mod3((uint16_t) fsign * b.coeffs[i]); - } - - for (i = 0; i < 10; i++) { - for (j = 0; j < NTRU_N; j++) { - temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; - } - PQCLEAN_NTRUHPS2048509_CLEAN_cmov((unsigned char *) & (r->coeffs), - (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); - k >>= 1; - } - - /* Reduce modulo Phi_n */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = PQCLEAN_NTRUHPS2048509_CLEAN_mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); - } + PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv(&ai2, a); + PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv_to_Rq_inv(r, &ai2, a); } diff --git a/crypto_kem/ntruhps2048509/clean/poly.h b/crypto_kem/ntruhps2048509/clean/poly.h index 276b99d6..d6045cc7 100644 --- a/crypto_kem/ntruhps2048509/clean/poly.h +++ b/crypto_kem/ntruhps2048509/clean/poly.h @@ -6,12 +6,16 @@ #include "params.h" #define MODQ(X) ((X) & (NTRU_Q-1)) -uint16_t PQCLEAN_NTRUHPS2048509_CLEAN_mod3(uint16_t a); typedef struct { + // round to nearest multiple of 32 to make it easier to load into vector + // registers without having to do bound checks +#define NTRU_N_32 PAD32(NTRU_N) uint16_t coeffs[NTRU_N]; } poly; +void PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(poly *r); +void PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_q_Phi_n(poly *r); void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_tobytes(unsigned char *r, const poly *a); void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_frombytes(poly *r, const unsigned char *a); @@ -24,11 +28,11 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_frombytes(poly *r, const unsigned char void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_mul(poly *r, const poly *a, const poly *b); void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b); -void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul_x_minus_1(poly *r, const poly *a); void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_mul(poly *r, const poly *a, const poly *b); void PQCLEAN_NTRUHPS2048509_CLEAN_poly_lift(poly *r, const poly *a); void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_to_S3(poly *r, const poly *a); +void PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv(poly *r, const poly *a); void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_inv(poly *r, const poly *a); void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_inv(poly *r, const poly *a); diff --git a/crypto_kem/ntruhps2048509/clean/poly_lift.c b/crypto_kem/ntruhps2048509/clean/poly_lift.c new file mode 100644 index 00000000..dfceb6ed --- /dev/null +++ b/crypto_kem/ntruhps2048509/clean/poly_lift.c @@ -0,0 +1,10 @@ +#include "poly.h" + +void PQCLEAN_NTRUHPS2048509_CLEAN_poly_lift(poly *r, const poly *a) { + int i; + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = a->coeffs[i]; + } + PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(r); +} + diff --git a/crypto_kem/ntruhps2048509/clean/poly_mod.c b/crypto_kem/ntruhps2048509/clean/poly_mod.c new file mode 100644 index 00000000..02ab2543 --- /dev/null +++ b/crypto_kem/ntruhps2048509/clean/poly_mod.c @@ -0,0 +1,45 @@ +#include "poly.h" + +static uint16_t mod3(uint16_t a) { + uint16_t r; + int16_t t, c; + + r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 + r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + + t = r - 3; + c = t >> 15; + + return (c & r) ^ (~c & t); +} + +void PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(poly *r) { + int i; + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); + } +} + +void PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_q_Phi_n(poly *r) { + int i; + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = r->coeffs[i] - r->coeffs[NTRU_N - 1]; + } +} + +void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_to_S3(poly *r, const poly *a) { + /* NOTE: Assumes input is in [0,Q-1]^N */ + /* Produces output in {0,1,2}^N */ + int i; + + /* Center coeffs around 3Q: [0, Q-1] -> [3Q - Q/2, 3Q + Q/2) */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = ((MODQ(a->coeffs[i]) >> (NTRU_LOGQ - 1)) ^ 3) << NTRU_LOGQ; + r->coeffs[i] += MODQ(a->coeffs[i]); + } + + PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(r); +} + diff --git a/crypto_kem/ntruhps2048509/clean/poly_r2_inv.c b/crypto_kem/ntruhps2048509/clean/poly_r2_inv.c new file mode 100644 index 00000000..e7d327d2 --- /dev/null +++ b/crypto_kem/ntruhps2048509/clean/poly_r2_inv.c @@ -0,0 +1,113 @@ +#include "poly.h" +#include "verify.h" + +#define POLY_R2_ADD(I,A,B,S) \ + for ((I)=0; (I)coeffs[i] ^ b->coeffs[i]) & swap; + a->coeffs[i] ^= t; + b->coeffs[i] ^= t; + } +} + +static inline void poly_divx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[i - 1] = (unsigned char) ((s * a->coeffs[i]) | (!s * a->coeffs[i - 1])); + } + a->coeffs[NTRU_N - 1] = (!s * a->coeffs[NTRU_N - 1]); +} + +static inline void poly_mulx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[NTRU_N - i] = (unsigned char) ((s * a->coeffs[NTRU_N - i - 1]) | (!s * a->coeffs[NTRU_N - i])); + } + a->coeffs[0] = (!s * a->coeffs[0]); +} + +void PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv(poly *r, const poly *a) { + /* Schroeppel--Orman--O'Malley--Spatscheck + * "Almost Inverse" algorithm as described + * by Silverman in NTRU Tech Report #14 */ + // with several modifications to make it run in constant-time + int i, j; + int k = 0; + uint16_t degf = NTRU_N - 1; + uint16_t degg = NTRU_N - 1; + int sign, t, swap; + int16_t done = 0; + poly b, f, g; + poly *c = r; // save some stack space + poly *temp_r = &f; + + /* b(X) := 1 */ + for (i = 1; i < NTRU_N; i++) { + b.coeffs[i] = 0; + } + b.coeffs[0] = 1; + + /* c(X) := 0 */ + for (i = 0; i < NTRU_N; i++) { + c->coeffs[i] = 0; + } + + /* f(X) := a(X) */ + for (i = 0; i < NTRU_N; i++) { + f.coeffs[i] = a->coeffs[i] & 1; + } + + /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ + for (i = 0; i < NTRU_N; i++) { + g.coeffs[i] = 1; + } + + for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { + sign = f.coeffs[0]; + swap = sign & !done & ((degf - degg) >> 15); + + cswappoly(&f, &g, swap); + cswappoly(&b, c, swap); + t = (degf ^ degg) & (-swap); + degf ^= t; + degg ^= t; + + POLY_R2_ADD(i, f, g, sign * (!done)); + POLY_R2_ADD(i, b, (*c), sign * (!done)); + + poly_divx(&f, !done); + poly_mulx(c, !done); + degf -= !done; + k += !done; + + done = 1 - (((uint16_t) - degf) >> 15); + } + + k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); + + /* Return X^{N-k} * b(X) */ + /* This is a k-coefficient rotation. We do this by looking at the binary + representation of k, rotating for every power of 2, and performing a cmov + if the respective bit is set. */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = b.coeffs[i]; + } + + for (i = 0; i < 10; i++) { + for (j = 0; j < NTRU_N; j++) { + temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; + } + PQCLEAN_NTRUHPS2048509_CLEAN_cmov((unsigned char *) & (r->coeffs), + (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); + k >>= 1; + } +} diff --git a/crypto_kem/ntruhps2048509/clean/poly_rq_mul.c b/crypto_kem/ntruhps2048509/clean/poly_rq_mul.c new file mode 100644 index 00000000..44ba5422 --- /dev/null +++ b/crypto_kem/ntruhps2048509/clean/poly_rq_mul.c @@ -0,0 +1,15 @@ +#include "poly.h" + +void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b) { + int k, i; + + for (k = 0; k < NTRU_N; k++) { + r->coeffs[k] = 0; + for (i = 1; i < NTRU_N - k; i++) { + r->coeffs[k] += a->coeffs[k + i] * b->coeffs[NTRU_N - i]; + } + for (i = 0; i < k + 1; i++) { + r->coeffs[k] += a->coeffs[k - i] * b->coeffs[i]; + } + } +} diff --git a/crypto_kem/ntruhps2048509/clean/poly_s3_inv.c b/crypto_kem/ntruhps2048509/clean/poly_s3_inv.c new file mode 100644 index 00000000..8fb5a21f --- /dev/null +++ b/crypto_kem/ntruhps2048509/clean/poly_s3_inv.c @@ -0,0 +1,137 @@ +#include "poly.h" +#include "verify.h" + +static uint16_t mod3(uint16_t a) { + uint16_t r; + int16_t t, c; + + r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 + r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + + t = r - 3; + c = t >> 15; + + return (c & r) ^ (~c & t); +} + +#define POLY_S3_FMADD(I,A,B,S) \ + for ((I)=0; (I)coeffs[i] ^ b->coeffs[i]) & swap; + a->coeffs[i] ^= t; + b->coeffs[i] ^= t; + } +} + +static inline void poly_divx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[i - 1] = (unsigned char) ((s * a->coeffs[i]) | (!s * a->coeffs[i - 1])); + } + a->coeffs[NTRU_N - 1] = (!s * a->coeffs[NTRU_N - 1]); +} + +static inline void poly_mulx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[NTRU_N - i] = (unsigned char) ((s * a->coeffs[NTRU_N - i - 1]) | (!s * a->coeffs[NTRU_N - i])); + } + a->coeffs[0] = (!s * a->coeffs[0]); +} + +void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_inv(poly *r, const poly *a) { + /* Schroeppel--Orman--O'Malley--Spatscheck + * "Almost Inverse" algorithm as described + * by Silverman in NTRU Tech Report #14 */ + // with several modifications to make it run in constant-time + int i, j; + uint16_t k = 0; + uint16_t degf = NTRU_N - 1; + uint16_t degg = NTRU_N - 1; + int sign, fsign = 0, t, swap; + int16_t done = 0; + poly b, c, f, g; + poly *temp_r = &f; + + /* b(X) := 1 */ + for (i = 1; i < NTRU_N; i++) { + b.coeffs[i] = 0; + } + b.coeffs[0] = 1; + + /* c(X) := 0 */ + for (i = 0; i < NTRU_N; i++) { + c.coeffs[i] = 0; + } + + /* f(X) := a(X) */ + for (i = 0; i < NTRU_N; i++) { + f.coeffs[i] = a->coeffs[i]; + } + + /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ + for (i = 0; i < NTRU_N; i++) { + g.coeffs[i] = 1; + } + + for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { + sign = mod3(2 * g.coeffs[0] * f.coeffs[0]); + swap = (((sign & 2) >> 1) | sign) & !done & ((degf - degg) >> 15); + + cswappoly(&f, &g, swap); + cswappoly(&b, &c, swap); + t = (degf ^ degg) & (-swap); + degf ^= t; + degg ^= t; + + for (i = 0; i < NTRU_N; i++) { + f.coeffs[i] = mod3(f.coeffs[i] + ((uint16_t) (sign * (!done))) * g.coeffs[i]); + } + for (i = 0; i < NTRU_N; i++) { + b.coeffs[i] = mod3(b.coeffs[i] + ((uint16_t) (sign * (!done))) * c.coeffs[i]); + } + + poly_divx(&f, !done); + poly_mulx(&c, !done); + degf -= !done; + k += !done; + + done = 1 - (((uint16_t) - degf) >> 15); + } + + fsign = f.coeffs[0]; + k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); + + /* Return X^{N-k} * b(X) */ + /* This is a k-coefficient rotation. We do this by looking at the binary + representation of k, rotating for every power of 2, and performing a cmov + if the respective bit is set. */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = mod3((uint16_t) fsign * b.coeffs[i]); + } + + for (i = 0; i < 10; i++) { + for (j = 0; j < NTRU_N; j++) { + temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; + } + PQCLEAN_NTRUHPS2048509_CLEAN_cmov((unsigned char *) & (r->coeffs), + (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); + k >>= 1; + } + + /* Reduce modulo Phi_n */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); + } +} diff --git a/crypto_kem/ntruhps2048509/clean/sample.c b/crypto_kem/ntruhps2048509/clean/sample.c index c4fc9709..da829217 100644 --- a/crypto_kem/ntruhps2048509/clean/sample.c +++ b/crypto_kem/ntruhps2048509/clean/sample.c @@ -1,5 +1,5 @@ +#include "crypto_sort.h" #include "sample.h" -#include "fips202.h" void PQCLEAN_NTRUHPS2048509_CLEAN_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]) { PQCLEAN_NTRUHPS2048509_CLEAN_sample_iid(f, uniformbytes); @@ -11,17 +11,6 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_sample_rm(poly *r, poly *m, const unsigned cha PQCLEAN_NTRUHPS2048509_CLEAN_sample_fixed_type(m, uniformbytes + NTRU_SAMPLE_IID_BYTES); } -void PQCLEAN_NTRUHPS2048509_CLEAN_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]) { - int i; - /* {0,1,...,255} -> {0,1,2}; Pr[0] = 86/256, Pr[1] = Pr[-1] = 85/256 */ - for (i = 0; i < NTRU_N - 1; i++) { - r->coeffs[i] = PQCLEAN_NTRUHPS2048509_CLEAN_mod3(uniformbytes[i]); - } - - r->coeffs[NTRU_N - 1] = 0; -} - -#include "crypto_sort.h" void PQCLEAN_NTRUHPS2048509_CLEAN_sample_fixed_type(poly *r, const unsigned char u[NTRU_SAMPLE_FT_BYTES]) { // Assumes NTRU_SAMPLE_FT_BYTES = ceil(30*(n-1)/8) diff --git a/crypto_kem/ntruhps2048509/clean/sample_iid.c b/crypto_kem/ntruhps2048509/clean/sample_iid.c new file mode 100644 index 00000000..ab5d921b --- /dev/null +++ b/crypto_kem/ntruhps2048509/clean/sample_iid.c @@ -0,0 +1,26 @@ +#include "sample.h" + +static uint16_t mod3(uint16_t a) { + uint16_t r; + int16_t t, c; + + r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 + r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + + t = r - 3; + c = t >> 15; + + return (c & r) ^ (~c & t); +} + +void PQCLEAN_NTRUHPS2048509_CLEAN_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]) { + int i; + /* {0,1,...,255} -> {0,1,2}; Pr[0] = 86/256, Pr[1] = Pr[-1] = 85/256 */ + for (i = 0; i < NTRU_N - 1; i++) { + r->coeffs[i] = mod3(uniformbytes[i]); + } + + r->coeffs[NTRU_N - 1] = 0; +} diff --git a/crypto_kem/ntruhps2048677/META.yml b/crypto_kem/ntruhps2048677/META.yml index 1e44307f..392668ff 100644 --- a/crypto_kem/ntruhps2048677/META.yml +++ b/crypto_kem/ntruhps2048677/META.yml @@ -20,4 +20,4 @@ auxiliary-submitters: - Zhenfei Zhang implementations: - name: clean - version: https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-2/submissions/NTRU-Round2.zip reference implemntation + version: https://github.com/jschanck/ntru/tree/485dde03 reference implementation diff --git a/crypto_kem/ntruhps2048677/clean/Makefile b/crypto_kem/ntruhps2048677/clean/Makefile index 7e892038..f65abed9 100644 --- a/crypto_kem/ntruhps2048677/clean/Makefile +++ b/crypto_kem/ntruhps2048677/clean/Makefile @@ -2,7 +2,7 @@ LIB=libntruhps2048677_clean.a HEADERS=api.h crypto_sort.h owcpa.h params.h poly.h sample.h verify.h -OBJECTS=crypto_sort.o kem.o owcpa.o pack3.o packq.o poly.o sample.o verify.o +OBJECTS=crypto_sort.o kem.o owcpa.o pack3.o packq.o poly.o poly_lift.o poly_mod.o poly_r2_inv.o poly_rq_mul.o poly_s3_inv.o sample.o sample_iid.o verify.o CFLAGS=-O3 -Wall -Wextra -Wpedantic -Werror -Wmissing-prototypes -Wredundant-decls -std=c99 -I../../../common $(EXTRAFLAGS) diff --git a/crypto_kem/ntruhps2048677/clean/Makefile.Microsoft_nmake b/crypto_kem/ntruhps2048677/clean/Makefile.Microsoft_nmake index d06e80a7..2205be99 100644 --- a/crypto_kem/ntruhps2048677/clean/Makefile.Microsoft_nmake +++ b/crypto_kem/ntruhps2048677/clean/Makefile.Microsoft_nmake @@ -2,7 +2,7 @@ # nmake /f Makefile.Microsoft_nmake LIBRARY=libntruhps2048677_clean.lib -OBJECTS=crypto_sort.obj kem.obj owcpa.obj pack3.obj packq.obj poly.obj sample.obj verify.obj +OBJECTS=crypto_sort.obj kem.obj owcpa.obj pack3.obj packq.obj poly.obj poly_lift.obj poly_mod.obj poly_r2_inv.obj poly_rq_mul.obj poly_s3_inv.obj sample.obj sample_iid.obj verify.obj CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX diff --git a/crypto_kem/ntruhps2048677/clean/kem.c b/crypto_kem/ntruhps2048677/clean/kem.c index d3ff7a15..0d12da94 100644 --- a/crypto_kem/ntruhps2048677/clean/kem.c +++ b/crypto_kem/ntruhps2048677/clean/kem.c @@ -5,6 +5,7 @@ #include "owcpa.h" #include "params.h" #include "randombytes.h" +#include "sample.h" #include "verify.h" // API FUNCTIONS @@ -20,15 +21,20 @@ int PQCLEAN_NTRUHPS2048677_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { } int PQCLEAN_NTRUHPS2048677_CLEAN_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk) { + poly r, m; uint8_t rm[NTRU_OWCPA_MSGBYTES]; uint8_t rm_seed[NTRU_SAMPLE_RM_BYTES]; randombytes(rm_seed, NTRU_SAMPLE_RM_BYTES); - PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_samplemsg(rm, rm_seed); + PQCLEAN_NTRUHPS2048677_CLEAN_sample_rm(&r, &m, rm_seed); + + PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_tobytes(rm, &r); + PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, &m); sha3_256(k, rm, NTRU_OWCPA_MSGBYTES); - PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_enc(c, rm, pk); + PQCLEAN_NTRUHPS2048677_CLEAN_poly_Z3_to_Zq(&r); + PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_enc(c, &r, &m, pk); return 0; } @@ -38,9 +44,14 @@ int PQCLEAN_NTRUHPS2048677_CLEAN_crypto_kem_dec(uint8_t *k, const uint8_t *c, co uint8_t rm[NTRU_OWCPA_MSGBYTES]; uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES]; - fail = PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_dec(rm, c, sk); - /* If fail = 0 then c = Enc(h, rm), there is no need to re-encapsulate. */ - /* See comment in PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_dec for details. */ + fail = 0; + + /* Check that unused bits of last byte of ciphertext are zero */ + fail |= c[NTRU_CIPHERTEXTBYTES - 1] & (0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG)))); + + fail |= PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_dec(rm, c, sk); + /* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */ + /* See comment in PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_dec for details. */ sha3_256(k, rm, NTRU_OWCPA_MSGBYTES); diff --git a/crypto_kem/ntruhps2048677/clean/owcpa.c b/crypto_kem/ntruhps2048677/clean/owcpa.c index aaa02b85..e1b5eda2 100644 --- a/crypto_kem/ntruhps2048677/clean/owcpa.c +++ b/crypto_kem/ntruhps2048677/clean/owcpa.c @@ -13,7 +13,7 @@ static int owcpa_check_r(const poly *r) { t |= c & (NTRU_Q - 4); /* 0 if c is in {0,1,2,3} */ t |= (c + 1) & 0x4; /* 0 if c is in {0,1,2} */ } - t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */ + t |= MODQ(r->coeffs[NTRU_N - 1]); /* Coefficient n-1 must be zero */ t = (~t + 1); // two's complement t >>= 63; return (int) t; @@ -38,16 +38,6 @@ static int owcpa_check_m(const poly *m) { return (int) t; } -void PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_samplemsg(unsigned char msg[NTRU_OWCPA_MSGBYTES], - const unsigned char seed[NTRU_SAMPLE_RM_BYTES]) { - poly r, m; - - PQCLEAN_NTRUHPS2048677_CLEAN_sample_rm(&r, &m, seed); - - PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_tobytes(msg, &r); - PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_tobytes(msg + NTRU_PACK_TRINARY_BYTES, &m); -} - void PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_keypair(unsigned char *pk, unsigned char *sk, const unsigned char seed[NTRU_SAMPLE_FG_BYTES]) { @@ -55,9 +45,8 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_keypair(unsigned char *pk, poly x1, x2, x3, x4, x5; - poly *f = &x1, *invf_mod3 = &x2; - poly *g = &x3, *G = &x2; - poly *Gf = &x3, *invGf = &x4, *tmp = &x5; + poly *f = &x1, *g = &x2, *invf_mod3 = &x3; + poly *gf = &x3, *invgf = &x4, *tmp = &x5; poly *invh = &x3, *h = &x3; PQCLEAN_NTRUHPS2048677_CLEAN_sample_fg(f, g, seed); @@ -70,45 +59,41 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_keypair(unsigned char *pk, PQCLEAN_NTRUHPS2048677_CLEAN_poly_Z3_to_Zq(f); PQCLEAN_NTRUHPS2048677_CLEAN_poly_Z3_to_Zq(g); - /* G = 3*g */ + /* g = 3*g */ for (i = 0; i < NTRU_N; i++) { - G->coeffs[i] = MODQ(3 * g->coeffs[i]); + g->coeffs[i] = 3 * g->coeffs[i]; } - PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul(Gf, G, f); + PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul(gf, g, f); - PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_inv(invGf, Gf); + PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_inv(invgf, gf); - PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul(tmp, invGf, f); + PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul(tmp, invgf, f); PQCLEAN_NTRUHPS2048677_CLEAN_poly_Sq_mul(invh, tmp, f); PQCLEAN_NTRUHPS2048677_CLEAN_poly_Sq_tobytes(sk + 2 * NTRU_PACK_TRINARY_BYTES, invh); - PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul(tmp, invGf, G); - PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul(h, tmp, G); + PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul(tmp, invgf, g); + PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul(h, tmp, g); PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_sum_zero_tobytes(pk, h); } void PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_enc(unsigned char *c, - const unsigned char *rm, + const poly *r, + const poly *m, const unsigned char *pk) { int i; - poly x1, x2, x3; + poly x1, x2; poly *h = &x1, *liftm = &x1; - poly *r = &x2, *m = &x2; - poly *ct = &x3; + poly *ct = &x2; PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_sum_zero_frombytes(h, pk); - PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_frombytes(r, rm); - PQCLEAN_NTRUHPS2048677_CLEAN_poly_Z3_to_Zq(r); - PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul(ct, r, h); - PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_frombytes(m, rm + NTRU_PACK_TRINARY_BYTES); PQCLEAN_NTRUHPS2048677_CLEAN_poly_lift(liftm, m); for (i = 0; i < NTRU_N; i++) { - ct->coeffs[i] = MODQ(ct->coeffs[i] + liftm->coeffs[i]); + ct->coeffs[i] = ct->coeffs[i] + liftm->coeffs[i]; } PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_sum_zero_tobytes(c, ct); @@ -147,7 +132,7 @@ int PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_dec(unsigned char *rm, /* b = c - Lift(m) mod (q, x^n - 1) */ PQCLEAN_NTRUHPS2048677_CLEAN_poly_lift(liftm, m); for (i = 0; i < NTRU_N; i++) { - b->coeffs[i] = MODQ(c->coeffs[i] - liftm->coeffs[i]); + b->coeffs[i] = c->coeffs[i] - liftm->coeffs[i]; } /* r = b / h mod (q, Phi_n) */ diff --git a/crypto_kem/ntruhps2048677/clean/owcpa.h b/crypto_kem/ntruhps2048677/clean/owcpa.h index 6d63915d..1f936c03 100644 --- a/crypto_kem/ntruhps2048677/clean/owcpa.h +++ b/crypto_kem/ntruhps2048677/clean/owcpa.h @@ -2,6 +2,7 @@ #define OWCPA_H #include "params.h" +#include "poly.h" void PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_samplemsg(unsigned char msg[NTRU_OWCPA_MSGBYTES], const unsigned char seed[NTRU_SEEDBYTES]); @@ -11,7 +12,8 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_keypair(unsigned char *pk, const unsigned char seed[NTRU_SEEDBYTES]); void PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_enc(unsigned char *c, - const unsigned char *rm, + const poly *r, + const poly *m, const unsigned char *pk); int PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_dec(unsigned char *rm, diff --git a/crypto_kem/ntruhps2048677/clean/pack3.c b/crypto_kem/ntruhps2048677/clean/pack3.c index 121fd44b..8ae65ee1 100644 --- a/crypto_kem/ntruhps2048677/clean/pack3.c +++ b/crypto_kem/ntruhps2048677/clean/pack3.c @@ -13,8 +13,6 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_tobytes(unsigned char msg[NTRU_OWCPA_M c = (3 * c + a->coeffs[5 * i + 0]) & 255; msg[i] = c; } - - // if ((NTRU_N - 1) % 5 != 0) i = NTRU_PACK_DEG / 5; c = 0; for (j = NTRU_PACK_DEG - (5 * i) - 1; j >= 0; j--) { @@ -31,21 +29,19 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_frombytes(poly *r, const unsigned char for (i = 0; i < NTRU_PACK_DEG / 5; i++) { c = msg[i]; - r->coeffs[5 * i + 0] = PQCLEAN_NTRUHPS2048677_CLEAN_mod3(c); - r->coeffs[5 * i + 1] = PQCLEAN_NTRUHPS2048677_CLEAN_mod3(c * 171 >> 9); // this is division by 3 - r->coeffs[5 * i + 2] = PQCLEAN_NTRUHPS2048677_CLEAN_mod3(c * 57 >> 9); // division by 3^2 - r->coeffs[5 * i + 3] = PQCLEAN_NTRUHPS2048677_CLEAN_mod3(c * 19 >> 9); // division by 3^3 - r->coeffs[5 * i + 4] = PQCLEAN_NTRUHPS2048677_CLEAN_mod3(c * 203 >> 14); // etc. + r->coeffs[5 * i + 0] = c; + r->coeffs[5 * i + 1] = c * 171 >> 9; // this is division by 3 + r->coeffs[5 * i + 2] = c * 57 >> 9; // division by 3^2 + r->coeffs[5 * i + 3] = c * 19 >> 9; // division by 3^3 + r->coeffs[5 * i + 4] = c * 203 >> 14; // etc. } - - // if ((NTRU_N - 1) % 5 != 0) i = NTRU_PACK_DEG / 5; c = msg[i]; for (j = 0; (5 * i + j) < NTRU_PACK_DEG; j++) { - r->coeffs[5 * i + j] = PQCLEAN_NTRUHPS2048677_CLEAN_mod3(c); + r->coeffs[5 * i + j] = c; c = c * 171 >> 9; } - r->coeffs[NTRU_N - 1] = 0; + PQCLEAN_NTRUHPS2048677_CLEAN_poly_mod_3_Phi_n(r); } diff --git a/crypto_kem/ntruhps2048677/clean/packq.c b/crypto_kem/ntruhps2048677/clean/packq.c index 759b6b16..ede8ec09 100644 --- a/crypto_kem/ntruhps2048677/clean/packq.c +++ b/crypto_kem/ntruhps2048677/clean/packq.c @@ -6,7 +6,7 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Sq_tobytes(unsigned char *r, const poly * for (i = 0; i < NTRU_PACK_DEG / 8; i++) { for (j = 0; j < 8; j++) { - t[j] = a->coeffs[8 * i + j]; + t[j] = MODQ(a->coeffs[8 * i + j]); } r[11 * i + 0] = (unsigned char) ( t[0] & 0xff); @@ -23,27 +23,29 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Sq_tobytes(unsigned char *r, const poly * } for (j = 0; j < NTRU_PACK_DEG - 8 * i; j++) { - t[j] = a->coeffs[8 * i + j]; + t[j] = MODQ(a->coeffs[8 * i + j]); } for (; j < 8; j++) { t[j] = 0; } - switch (NTRU_PACK_DEG - 8 * (NTRU_PACK_DEG / 8)) { - case 6: - r[11 * i + 8] = (unsigned char) ((t[5] >> 9) | ((t[6] & 0x3f) << 2)); - r[11 * i + 7] = (unsigned char) ((t[5] >> 1) & 0xff); - r[11 * i + 6] = (unsigned char) ((t[4] >> 4) | ((t[5] & 0x01) << 7)); - // fallthrough + + switch (NTRU_PACK_DEG & 0x07) { + // cases 0 and 6 are impossible since 2 generates (Z/n)* and + // p mod 8 in {1, 7} implies that 2 is a quadratic residue. case 4: - r[11 * i + 5] = (unsigned char) ((t[3] >> 7) | ((t[4] & 0x0f) << 4)); - r[11 * i + 4] = (unsigned char) ((t[2] >> 10) | ((t[3] & 0x7f) << 1)); - r[11 * i + 3] = (unsigned char) ((t[2] >> 2) & 0xff); - // fallthrough + r[11 * i + 0] = (unsigned char) (t[0] & 0xff); + r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3); + r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6); + r[11 * i + 3] = (unsigned char) (t[2] >> 2) & 0xff; + r[11 * i + 4] = (unsigned char) (t[2] >> 10) | ((t[3] & 0x7f) << 1); + r[11 * i + 5] = (unsigned char) (t[3] >> 7) | ((t[4] & 0x0f) << 4); + break; case 2: - r[11 * i + 2] = (unsigned char) ((t[1] >> 5) | ((t[2] & 0x03) << 6)); - r[11 * i + 1] = (unsigned char) ((t[0] >> 8) | ((t[1] & 0x1f) << 3)); - r[11 * i + 0] = (unsigned char) ( t[0] & 0xff); + r[11 * i + 0] = (unsigned char) (t[0] & 0xff); + r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3); + r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6); + break; } } @@ -59,19 +61,22 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Sq_frombytes(poly *r, const unsigned char r->coeffs[8 * i + 6] = (a[11 * i + 8] >> 2) | (((uint16_t)a[11 * i + 9] & 0x1f) << 6); r->coeffs[8 * i + 7] = (a[11 * i + 9] >> 5) | (((uint16_t)a[11 * i + 10] & 0xff) << 3); } - switch (NTRU_PACK_DEG - 8 * (NTRU_PACK_DEG / 8)) { - case 6: - r->coeffs[8 * i + 5] = (a[11 * i + 6] >> 7) | (((uint16_t)a[11 * i + 7] & 0xff) << 1) | (((uint16_t)a[11 * i + 8] & 0x03) << 9); - r->coeffs[8 * i + 4] = (a[11 * i + 5] >> 4) | (((uint16_t)a[11 * i + 6] & 0x7f) << 4); - // fallthrough + + switch (NTRU_PACK_DEG & 0x07) { + // cases 0 and 6 are impossible since 2 generates (Z/n)* and + // p mod 8 in {1, 7} implies that 2 is a quadratic residue. case 4: - r->coeffs[8 * i + 3] = (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7); - r->coeffs[8 * i + 2] = (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10); - // fallthrough + r->coeffs[8 * i + 0] = (unsigned char) (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8); + r->coeffs[8 * i + 1] = (unsigned char) (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5); + r->coeffs[8 * i + 2] = (unsigned char) (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10); + r->coeffs[8 * i + 3] = (unsigned char) (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7); + break; case 2: - r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5); - r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8); + r->coeffs[8 * i + 0] = (unsigned char) (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8); + r->coeffs[8 * i + 1] = (unsigned char) (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5); + break; } + r->coeffs[NTRU_N - 1] = 0; } void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a) { @@ -85,7 +90,6 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_sum_zero_frombytes(poly *r, const unsi /* Set r[n-1] so that the sum of coefficients is zero mod q */ r->coeffs[NTRU_N - 1] = 0; for (i = 0; i < NTRU_PACK_DEG; i++) { - r->coeffs[NTRU_N - 1] += r->coeffs[i]; + r->coeffs[NTRU_N - 1] -= r->coeffs[i]; } - r->coeffs[NTRU_N - 1] = MODQ(-(r->coeffs[NTRU_N - 1])); } diff --git a/crypto_kem/ntruhps2048677/clean/params.h b/crypto_kem/ntruhps2048677/clean/params.h index c11f6f12..2fab214a 100644 --- a/crypto_kem/ntruhps2048677/clean/params.h +++ b/crypto_kem/ntruhps2048677/clean/params.h @@ -7,6 +7,8 @@ /* Do not modify below this line */ +#define PAD32(X) ((((X) + 31)/32)*32) + #define NTRU_Q (1 << NTRU_LOGQ) #define NTRU_WEIGHT (NTRU_Q/8 - 2) diff --git a/crypto_kem/ntruhps2048677/clean/poly.c b/crypto_kem/ntruhps2048677/clean/poly.c index 60ae348c..a44c21df 100644 --- a/crypto_kem/ntruhps2048677/clean/poly.c +++ b/crypto_kem/ntruhps2048677/clean/poly.c @@ -1,21 +1,4 @@ #include "poly.h" -#include "fips202.h" -#include "verify.h" - -uint16_t PQCLEAN_NTRUHPS2048677_CLEAN_mod3(uint16_t a) { - uint16_t r; - int16_t t, c; - - r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 - r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 - r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 - r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 - - t = r - 3; - c = t >> 15; - - return (c & r) ^ (~c & t); -} /* Map {0, 1, 2} -> {0,1,q-1} in place */ void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Z3_to_Zq(poly *r) { @@ -29,197 +12,22 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Z3_to_Zq(poly *r) { void PQCLEAN_NTRUHPS2048677_CLEAN_poly_trinary_Zq_to_Z3(poly *r) { int i; for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = MODQ(r->coeffs[i]); r->coeffs[i] = 3 & (r->coeffs[i] ^ (r->coeffs[i] >> (NTRU_LOGQ - 1))); } } -void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b) { - int k, i; - - for (k = 0; k < NTRU_N; k++) { - r->coeffs[k] = 0; - for (i = 1; i < NTRU_N - k; i++) { - r->coeffs[k] += a->coeffs[k + i] * b->coeffs[NTRU_N - i]; - } - for (i = 0; i < k + 1; i++) { - r->coeffs[k] += a->coeffs[k - i] * b->coeffs[i]; - } - r->coeffs[k] = MODQ(r->coeffs[k]); - } -} - void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Sq_mul(poly *r, const poly *a, const poly *b) { - int i; PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul(r, a, b); - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = MODQ(r->coeffs[i] - r->coeffs[NTRU_N - 1]); - } + PQCLEAN_NTRUHPS2048677_CLEAN_poly_mod_q_Phi_n(r); } void PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_mul(poly *r, const poly *a, const poly *b) { - int k, i; - - for (k = 0; k < NTRU_N; k++) { - r->coeffs[k] = 0; - for (i = 1; i < NTRU_N - k; i++) { - r->coeffs[k] += a->coeffs[k + i] * b->coeffs[NTRU_N - i]; - } - for (i = 0; i < k + 1; i++) { - r->coeffs[k] += a->coeffs[k - i] * b->coeffs[i]; - } - } - for (k = 0; k < NTRU_N; k++) { - r->coeffs[k] = PQCLEAN_NTRUHPS2048677_CLEAN_mod3(r->coeffs[k] + 2 * r->coeffs[NTRU_N - 1]); - } + PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul(r, a, b); + PQCLEAN_NTRUHPS2048677_CLEAN_poly_mod_3_Phi_n(r); } -void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul_x_minus_1(poly *r, const poly *a) { - int i; - uint16_t last_coeff = a->coeffs[NTRU_N - 1]; - - for (i = NTRU_N - 1; i > 0; i--) { - r->coeffs[i] = MODQ(a->coeffs[i - 1] + (NTRU_Q - a->coeffs[i])); - } - r->coeffs[0] = MODQ(last_coeff + (NTRU_Q - a->coeffs[0])); -} - -void PQCLEAN_NTRUHPS2048677_CLEAN_poly_lift(poly *r, const poly *a) { - int i; - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = a->coeffs[i]; - } - PQCLEAN_NTRUHPS2048677_CLEAN_poly_Z3_to_Zq(r); -} - -void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_to_S3(poly *r, const poly *a) { - /* NOTE: Assumes input is in [0,Q-1]^N */ - /* Produces output in {0,1,2}^N */ - int i; - - /* Center coeffs around 3Q: [0, Q-1] -> [3Q - Q/2, 3Q + Q/2) */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = ((a->coeffs[i] >> (NTRU_LOGQ - 1)) ^ 3) << NTRU_LOGQ; - r->coeffs[i] += a->coeffs[i]; - } - /* Reduce mod (3, Phi) */ - r->coeffs[NTRU_N - 1] = PQCLEAN_NTRUHPS2048677_CLEAN_mod3(r->coeffs[NTRU_N - 1]); - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = PQCLEAN_NTRUHPS2048677_CLEAN_mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); - } -} - -#define POLY_R2_ADD(I,A,B,S) \ - for ((I)=0; (I)coeffs[i] ^ b->coeffs[i]) & swap; - a->coeffs[i] ^= t; - b->coeffs[i] ^= t; - } -} - -static inline void poly_divx(poly *a, int s) { - int i; - - for (i = 1; i < NTRU_N; i++) { - a->coeffs[i - 1] = (unsigned char) ((s * a->coeffs[i]) | (!s * a->coeffs[i - 1])); - } - a->coeffs[NTRU_N - 1] = (!s * a->coeffs[NTRU_N - 1]); -} - -static inline void poly_mulx(poly *a, int s) { - int i; - - for (i = 1; i < NTRU_N; i++) { - a->coeffs[NTRU_N - i] = (unsigned char) ((s * a->coeffs[NTRU_N - i - 1]) | (!s * a->coeffs[NTRU_N - i])); - } - a->coeffs[0] = (!s * a->coeffs[0]); -} - -static void poly_R2_inv(poly *r, const poly *a) { - /* Schroeppel--Orman--O'Malley--Spatscheck - * "Almost Inverse" algorithm as described - * by Silverman in NTRU Tech Report #14 */ - // with several modifications to make it run in constant-time - int i, j; - int k = 0; - uint16_t degf = NTRU_N - 1; - uint16_t degg = NTRU_N - 1; - int sign, t, swap; - int16_t done = 0; - poly b, f, g; - poly *c = r; // save some stack space - poly *temp_r = &f; - - /* b(X) := 1 */ - for (i = 1; i < NTRU_N; i++) { - b.coeffs[i] = 0; - } - b.coeffs[0] = 1; - - /* c(X) := 0 */ - for (i = 0; i < NTRU_N; i++) { - c->coeffs[i] = 0; - } - - /* f(X) := a(X) */ - for (i = 0; i < NTRU_N; i++) { - f.coeffs[i] = a->coeffs[i] & 1; - } - - /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ - for (i = 0; i < NTRU_N; i++) { - g.coeffs[i] = 1; - } - - for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { - sign = f.coeffs[0]; - swap = sign & !done & ((degf - degg) >> 15); - - cswappoly(&f, &g, swap); - cswappoly(&b, c, swap); - t = (degf ^ degg) & (-swap); - degf ^= t; - degg ^= t; - - POLY_R2_ADD(i, f, g, sign * (!done)); - POLY_R2_ADD(i, b, (*c), sign * (!done)); - - poly_divx(&f, !done); - poly_mulx(c, !done); - degf -= !done; - k += !done; - - done = 1 - (((uint16_t) - degf) >> 15); - } - - k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); - - /* Return X^{N-k} * b(X) */ - /* This is a k-coefficient rotation. We do this by looking at the binary - representation of k, rotating for every power of 2, and performing a cmov - if the respective bit is set. */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = b.coeffs[i]; - } - - for (i = 0; i < 10; i++) { - for (j = 0; j < NTRU_N; j++) { - temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; - } - PQCLEAN_NTRUHPS2048677_CLEAN_cmov((unsigned char *) & (r->coeffs), - (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); - k >>= 1; - } -} - -static void poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { +static void PQCLEAN_NTRUHPS2048677_CLEAN_poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { int i; poly b, c; @@ -228,7 +36,7 @@ static void poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { // for 0..4 // ai = ai * (2 - a*ai) mod q for (i = 0; i < NTRU_N; i++) { - b.coeffs[i] = MODQ(NTRU_Q - a->coeffs[i]); // b = -a + b.coeffs[i] = -(a->coeffs[i]); } for (i = 0; i < NTRU_N; i++) { @@ -254,92 +62,6 @@ static void poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_inv(poly *r, const poly *a) { poly ai2; - poly_R2_inv(&ai2, a); - poly_R2_inv_to_Rq_inv(r, &ai2, a); -} - -void PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_inv(poly *r, const poly *a) { - /* Schroeppel--Orman--O'Malley--Spatscheck - * "Almost Inverse" algorithm as described - * by Silverman in NTRU Tech Report #14 */ - // with several modifications to make it run in constant-time - int i, j; - uint16_t k = 0; - uint16_t degf = NTRU_N - 1; - uint16_t degg = NTRU_N - 1; - int sign, fsign = 0, t, swap; - int16_t done = 0; - poly b, c, f, g; - poly *temp_r = &f; - - /* b(X) := 1 */ - for (i = 1; i < NTRU_N; i++) { - b.coeffs[i] = 0; - } - b.coeffs[0] = 1; - - /* c(X) := 0 */ - for (i = 0; i < NTRU_N; i++) { - c.coeffs[i] = 0; - } - - /* f(X) := a(X) */ - for (i = 0; i < NTRU_N; i++) { - f.coeffs[i] = a->coeffs[i]; - } - - /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ - for (i = 0; i < NTRU_N; i++) { - g.coeffs[i] = 1; - } - - for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { - sign = PQCLEAN_NTRUHPS2048677_CLEAN_mod3(2 * g.coeffs[0] * f.coeffs[0]); - swap = (((sign & 2) >> 1) | sign) & !done & ((degf - degg) >> 15); - - cswappoly(&f, &g, swap); - cswappoly(&b, &c, swap); - t = (degf ^ degg) & (-swap); - degf ^= t; - degg ^= t; - - for (i = 0; i < NTRU_N; i++) { - f.coeffs[i] = PQCLEAN_NTRUHPS2048677_CLEAN_mod3(f.coeffs[i] + ((uint16_t) (sign * (!done))) * g.coeffs[i]); - } - for (i = 0; i < NTRU_N; i++) { - b.coeffs[i] = PQCLEAN_NTRUHPS2048677_CLEAN_mod3(b.coeffs[i] + ((uint16_t) (sign * (!done))) * c.coeffs[i]); - } - - poly_divx(&f, !done); - poly_mulx(&c, !done); - degf -= !done; - k += !done; - - done = 1 - (((uint16_t) - degf) >> 15); - } - - fsign = f.coeffs[0]; - k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); - - /* Return X^{N-k} * b(X) */ - /* This is a k-coefficient rotation. We do this by looking at the binary - representation of k, rotating for every power of 2, and performing a cmov - if the respective bit is set. */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = PQCLEAN_NTRUHPS2048677_CLEAN_mod3((uint16_t) fsign * b.coeffs[i]); - } - - for (i = 0; i < 10; i++) { - for (j = 0; j < NTRU_N; j++) { - temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; - } - PQCLEAN_NTRUHPS2048677_CLEAN_cmov((unsigned char *) & (r->coeffs), - (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); - k >>= 1; - } - - /* Reduce modulo Phi_n */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = PQCLEAN_NTRUHPS2048677_CLEAN_mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); - } + PQCLEAN_NTRUHPS2048677_CLEAN_poly_R2_inv(&ai2, a); + PQCLEAN_NTRUHPS2048677_CLEAN_poly_R2_inv_to_Rq_inv(r, &ai2, a); } diff --git a/crypto_kem/ntruhps2048677/clean/poly.h b/crypto_kem/ntruhps2048677/clean/poly.h index 0402282a..54876069 100644 --- a/crypto_kem/ntruhps2048677/clean/poly.h +++ b/crypto_kem/ntruhps2048677/clean/poly.h @@ -6,12 +6,16 @@ #include "params.h" #define MODQ(X) ((X) & (NTRU_Q-1)) -uint16_t PQCLEAN_NTRUHPS2048677_CLEAN_mod3(uint16_t a); typedef struct { + // round to nearest multiple of 32 to make it easier to load into vector + // registers without having to do bound checks +#define NTRU_N_32 PAD32(NTRU_N) uint16_t coeffs[NTRU_N]; } poly; +void PQCLEAN_NTRUHPS2048677_CLEAN_poly_mod_3_Phi_n(poly *r); +void PQCLEAN_NTRUHPS2048677_CLEAN_poly_mod_q_Phi_n(poly *r); void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Sq_tobytes(unsigned char *r, const poly *a); void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Sq_frombytes(poly *r, const unsigned char *a); @@ -24,11 +28,11 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_frombytes(poly *r, const unsigned char void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Sq_mul(poly *r, const poly *a, const poly *b); void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b); -void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul_x_minus_1(poly *r, const poly *a); void PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_mul(poly *r, const poly *a, const poly *b); void PQCLEAN_NTRUHPS2048677_CLEAN_poly_lift(poly *r, const poly *a); void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_to_S3(poly *r, const poly *a); +void PQCLEAN_NTRUHPS2048677_CLEAN_poly_R2_inv(poly *r, const poly *a); void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_inv(poly *r, const poly *a); void PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_inv(poly *r, const poly *a); diff --git a/crypto_kem/ntruhps2048677/clean/poly_lift.c b/crypto_kem/ntruhps2048677/clean/poly_lift.c new file mode 100644 index 00000000..b49109e4 --- /dev/null +++ b/crypto_kem/ntruhps2048677/clean/poly_lift.c @@ -0,0 +1,10 @@ +#include "poly.h" + +void PQCLEAN_NTRUHPS2048677_CLEAN_poly_lift(poly *r, const poly *a) { + int i; + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = a->coeffs[i]; + } + PQCLEAN_NTRUHPS2048677_CLEAN_poly_Z3_to_Zq(r); +} + diff --git a/crypto_kem/ntruhps2048677/clean/poly_mod.c b/crypto_kem/ntruhps2048677/clean/poly_mod.c new file mode 100644 index 00000000..5bd675e7 --- /dev/null +++ b/crypto_kem/ntruhps2048677/clean/poly_mod.c @@ -0,0 +1,45 @@ +#include "poly.h" + +static uint16_t mod3(uint16_t a) { + uint16_t r; + int16_t t, c; + + r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 + r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + + t = r - 3; + c = t >> 15; + + return (c & r) ^ (~c & t); +} + +void PQCLEAN_NTRUHPS2048677_CLEAN_poly_mod_3_Phi_n(poly *r) { + int i; + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); + } +} + +void PQCLEAN_NTRUHPS2048677_CLEAN_poly_mod_q_Phi_n(poly *r) { + int i; + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = r->coeffs[i] - r->coeffs[NTRU_N - 1]; + } +} + +void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_to_S3(poly *r, const poly *a) { + /* NOTE: Assumes input is in [0,Q-1]^N */ + /* Produces output in {0,1,2}^N */ + int i; + + /* Center coeffs around 3Q: [0, Q-1] -> [3Q - Q/2, 3Q + Q/2) */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = ((MODQ(a->coeffs[i]) >> (NTRU_LOGQ - 1)) ^ 3) << NTRU_LOGQ; + r->coeffs[i] += MODQ(a->coeffs[i]); + } + + PQCLEAN_NTRUHPS2048677_CLEAN_poly_mod_3_Phi_n(r); +} + diff --git a/crypto_kem/ntruhps2048677/clean/poly_r2_inv.c b/crypto_kem/ntruhps2048677/clean/poly_r2_inv.c new file mode 100644 index 00000000..124fbaaa --- /dev/null +++ b/crypto_kem/ntruhps2048677/clean/poly_r2_inv.c @@ -0,0 +1,113 @@ +#include "poly.h" +#include "verify.h" + +#define POLY_R2_ADD(I,A,B,S) \ + for ((I)=0; (I)coeffs[i] ^ b->coeffs[i]) & swap; + a->coeffs[i] ^= t; + b->coeffs[i] ^= t; + } +} + +static inline void poly_divx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[i - 1] = (unsigned char) ((s * a->coeffs[i]) | (!s * a->coeffs[i - 1])); + } + a->coeffs[NTRU_N - 1] = (!s * a->coeffs[NTRU_N - 1]); +} + +static inline void poly_mulx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[NTRU_N - i] = (unsigned char) ((s * a->coeffs[NTRU_N - i - 1]) | (!s * a->coeffs[NTRU_N - i])); + } + a->coeffs[0] = (!s * a->coeffs[0]); +} + +void PQCLEAN_NTRUHPS2048677_CLEAN_poly_R2_inv(poly *r, const poly *a) { + /* Schroeppel--Orman--O'Malley--Spatscheck + * "Almost Inverse" algorithm as described + * by Silverman in NTRU Tech Report #14 */ + // with several modifications to make it run in constant-time + int i, j; + int k = 0; + uint16_t degf = NTRU_N - 1; + uint16_t degg = NTRU_N - 1; + int sign, t, swap; + int16_t done = 0; + poly b, f, g; + poly *c = r; // save some stack space + poly *temp_r = &f; + + /* b(X) := 1 */ + for (i = 1; i < NTRU_N; i++) { + b.coeffs[i] = 0; + } + b.coeffs[0] = 1; + + /* c(X) := 0 */ + for (i = 0; i < NTRU_N; i++) { + c->coeffs[i] = 0; + } + + /* f(X) := a(X) */ + for (i = 0; i < NTRU_N; i++) { + f.coeffs[i] = a->coeffs[i] & 1; + } + + /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ + for (i = 0; i < NTRU_N; i++) { + g.coeffs[i] = 1; + } + + for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { + sign = f.coeffs[0]; + swap = sign & !done & ((degf - degg) >> 15); + + cswappoly(&f, &g, swap); + cswappoly(&b, c, swap); + t = (degf ^ degg) & (-swap); + degf ^= t; + degg ^= t; + + POLY_R2_ADD(i, f, g, sign * (!done)); + POLY_R2_ADD(i, b, (*c), sign * (!done)); + + poly_divx(&f, !done); + poly_mulx(c, !done); + degf -= !done; + k += !done; + + done = 1 - (((uint16_t) - degf) >> 15); + } + + k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); + + /* Return X^{N-k} * b(X) */ + /* This is a k-coefficient rotation. We do this by looking at the binary + representation of k, rotating for every power of 2, and performing a cmov + if the respective bit is set. */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = b.coeffs[i]; + } + + for (i = 0; i < 10; i++) { + for (j = 0; j < NTRU_N; j++) { + temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; + } + PQCLEAN_NTRUHPS2048677_CLEAN_cmov((unsigned char *) & (r->coeffs), + (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); + k >>= 1; + } +} diff --git a/crypto_kem/ntruhps2048677/clean/poly_rq_mul.c b/crypto_kem/ntruhps2048677/clean/poly_rq_mul.c new file mode 100644 index 00000000..3bc3cc9e --- /dev/null +++ b/crypto_kem/ntruhps2048677/clean/poly_rq_mul.c @@ -0,0 +1,15 @@ +#include "poly.h" + +void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b) { + int k, i; + + for (k = 0; k < NTRU_N; k++) { + r->coeffs[k] = 0; + for (i = 1; i < NTRU_N - k; i++) { + r->coeffs[k] += a->coeffs[k + i] * b->coeffs[NTRU_N - i]; + } + for (i = 0; i < k + 1; i++) { + r->coeffs[k] += a->coeffs[k - i] * b->coeffs[i]; + } + } +} diff --git a/crypto_kem/ntruhps2048677/clean/poly_s3_inv.c b/crypto_kem/ntruhps2048677/clean/poly_s3_inv.c new file mode 100644 index 00000000..d935eb0e --- /dev/null +++ b/crypto_kem/ntruhps2048677/clean/poly_s3_inv.c @@ -0,0 +1,137 @@ +#include "poly.h" +#include "verify.h" + +static uint16_t mod3(uint16_t a) { + uint16_t r; + int16_t t, c; + + r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 + r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + + t = r - 3; + c = t >> 15; + + return (c & r) ^ (~c & t); +} + +#define POLY_S3_FMADD(I,A,B,S) \ + for ((I)=0; (I)coeffs[i] ^ b->coeffs[i]) & swap; + a->coeffs[i] ^= t; + b->coeffs[i] ^= t; + } +} + +static inline void poly_divx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[i - 1] = (unsigned char) ((s * a->coeffs[i]) | (!s * a->coeffs[i - 1])); + } + a->coeffs[NTRU_N - 1] = (!s * a->coeffs[NTRU_N - 1]); +} + +static inline void poly_mulx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[NTRU_N - i] = (unsigned char) ((s * a->coeffs[NTRU_N - i - 1]) | (!s * a->coeffs[NTRU_N - i])); + } + a->coeffs[0] = (!s * a->coeffs[0]); +} + +void PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_inv(poly *r, const poly *a) { + /* Schroeppel--Orman--O'Malley--Spatscheck + * "Almost Inverse" algorithm as described + * by Silverman in NTRU Tech Report #14 */ + // with several modifications to make it run in constant-time + int i, j; + uint16_t k = 0; + uint16_t degf = NTRU_N - 1; + uint16_t degg = NTRU_N - 1; + int sign, fsign = 0, t, swap; + int16_t done = 0; + poly b, c, f, g; + poly *temp_r = &f; + + /* b(X) := 1 */ + for (i = 1; i < NTRU_N; i++) { + b.coeffs[i] = 0; + } + b.coeffs[0] = 1; + + /* c(X) := 0 */ + for (i = 0; i < NTRU_N; i++) { + c.coeffs[i] = 0; + } + + /* f(X) := a(X) */ + for (i = 0; i < NTRU_N; i++) { + f.coeffs[i] = a->coeffs[i]; + } + + /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ + for (i = 0; i < NTRU_N; i++) { + g.coeffs[i] = 1; + } + + for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { + sign = mod3(2 * g.coeffs[0] * f.coeffs[0]); + swap = (((sign & 2) >> 1) | sign) & !done & ((degf - degg) >> 15); + + cswappoly(&f, &g, swap); + cswappoly(&b, &c, swap); + t = (degf ^ degg) & (-swap); + degf ^= t; + degg ^= t; + + for (i = 0; i < NTRU_N; i++) { + f.coeffs[i] = mod3(f.coeffs[i] + ((uint16_t) (sign * (!done))) * g.coeffs[i]); + } + for (i = 0; i < NTRU_N; i++) { + b.coeffs[i] = mod3(b.coeffs[i] + ((uint16_t) (sign * (!done))) * c.coeffs[i]); + } + + poly_divx(&f, !done); + poly_mulx(&c, !done); + degf -= !done; + k += !done; + + done = 1 - (((uint16_t) - degf) >> 15); + } + + fsign = f.coeffs[0]; + k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); + + /* Return X^{N-k} * b(X) */ + /* This is a k-coefficient rotation. We do this by looking at the binary + representation of k, rotating for every power of 2, and performing a cmov + if the respective bit is set. */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = mod3((uint16_t) fsign * b.coeffs[i]); + } + + for (i = 0; i < 10; i++) { + for (j = 0; j < NTRU_N; j++) { + temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; + } + PQCLEAN_NTRUHPS2048677_CLEAN_cmov((unsigned char *) & (r->coeffs), + (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); + k >>= 1; + } + + /* Reduce modulo Phi_n */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); + } +} diff --git a/crypto_kem/ntruhps2048677/clean/sample.c b/crypto_kem/ntruhps2048677/clean/sample.c index 7cc893ad..1ad42233 100644 --- a/crypto_kem/ntruhps2048677/clean/sample.c +++ b/crypto_kem/ntruhps2048677/clean/sample.c @@ -1,5 +1,5 @@ +#include "crypto_sort.h" #include "sample.h" -#include "fips202.h" void PQCLEAN_NTRUHPS2048677_CLEAN_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]) { PQCLEAN_NTRUHPS2048677_CLEAN_sample_iid(f, uniformbytes); @@ -11,17 +11,6 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_sample_rm(poly *r, poly *m, const unsigned cha PQCLEAN_NTRUHPS2048677_CLEAN_sample_fixed_type(m, uniformbytes + NTRU_SAMPLE_IID_BYTES); } -void PQCLEAN_NTRUHPS2048677_CLEAN_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]) { - int i; - /* {0,1,...,255} -> {0,1,2}; Pr[0] = 86/256, Pr[1] = Pr[-1] = 85/256 */ - for (i = 0; i < NTRU_N - 1; i++) { - r->coeffs[i] = PQCLEAN_NTRUHPS2048677_CLEAN_mod3(uniformbytes[i]); - } - - r->coeffs[NTRU_N - 1] = 0; -} - -#include "crypto_sort.h" void PQCLEAN_NTRUHPS2048677_CLEAN_sample_fixed_type(poly *r, const unsigned char u[NTRU_SAMPLE_FT_BYTES]) { // Assumes NTRU_SAMPLE_FT_BYTES = ceil(30*(n-1)/8) diff --git a/crypto_kem/ntruhps2048677/clean/sample_iid.c b/crypto_kem/ntruhps2048677/clean/sample_iid.c new file mode 100644 index 00000000..59e3f0f2 --- /dev/null +++ b/crypto_kem/ntruhps2048677/clean/sample_iid.c @@ -0,0 +1,26 @@ +#include "sample.h" + +static uint16_t mod3(uint16_t a) { + uint16_t r; + int16_t t, c; + + r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 + r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + + t = r - 3; + c = t >> 15; + + return (c & r) ^ (~c & t); +} + +void PQCLEAN_NTRUHPS2048677_CLEAN_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]) { + int i; + /* {0,1,...,255} -> {0,1,2}; Pr[0] = 86/256, Pr[1] = Pr[-1] = 85/256 */ + for (i = 0; i < NTRU_N - 1; i++) { + r->coeffs[i] = mod3(uniformbytes[i]); + } + + r->coeffs[NTRU_N - 1] = 0; +} diff --git a/crypto_kem/ntruhps4096821/META.yml b/crypto_kem/ntruhps4096821/META.yml index 778b3cbe..7ad64ded 100644 --- a/crypto_kem/ntruhps4096821/META.yml +++ b/crypto_kem/ntruhps4096821/META.yml @@ -20,4 +20,4 @@ auxiliary-submitters: - Zhenfei Zhang implementations: - name: clean - version: https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-2/submissions/NTRU-Round2.zip reference implemntation + version: https://github.com/jschanck/ntru/tree/485dde03 reference implementation diff --git a/crypto_kem/ntruhps4096821/clean/Makefile b/crypto_kem/ntruhps4096821/clean/Makefile index 281e99dd..27bf5bdf 100644 --- a/crypto_kem/ntruhps4096821/clean/Makefile +++ b/crypto_kem/ntruhps4096821/clean/Makefile @@ -2,7 +2,7 @@ LIB=libntruhps4096821_clean.a HEADERS=api.h crypto_sort.h owcpa.h params.h poly.h sample.h verify.h -OBJECTS=crypto_sort.o kem.o owcpa.o pack3.o packq.o poly.o sample.o verify.o +OBJECTS=crypto_sort.o kem.o owcpa.o pack3.o packq.o poly.o poly_lift.o poly_mod.o poly_r2_inv.o poly_rq_mul.o poly_s3_inv.o sample.o sample_iid.o verify.o CFLAGS=-O3 -Wall -Wextra -Wpedantic -Werror -Wmissing-prototypes -Wredundant-decls -std=c99 -I../../../common $(EXTRAFLAGS) diff --git a/crypto_kem/ntruhps4096821/clean/Makefile.Microsoft_nmake b/crypto_kem/ntruhps4096821/clean/Makefile.Microsoft_nmake index 642620b9..91002600 100644 --- a/crypto_kem/ntruhps4096821/clean/Makefile.Microsoft_nmake +++ b/crypto_kem/ntruhps4096821/clean/Makefile.Microsoft_nmake @@ -2,7 +2,7 @@ # nmake /f Makefile.Microsoft_nmake LIBRARY=libntruhps4096821_clean.lib -OBJECTS=crypto_sort.obj kem.obj owcpa.obj pack3.obj packq.obj poly.obj sample.obj verify.obj +OBJECTS=crypto_sort.obj kem.obj owcpa.obj pack3.obj packq.obj poly.obj poly_lift.obj poly_mod.obj poly_r2_inv.obj poly_rq_mul.obj poly_s3_inv.obj sample.obj sample_iid.obj verify.obj CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX diff --git a/crypto_kem/ntruhps4096821/clean/kem.c b/crypto_kem/ntruhps4096821/clean/kem.c index b1f0941c..252475a7 100644 --- a/crypto_kem/ntruhps4096821/clean/kem.c +++ b/crypto_kem/ntruhps4096821/clean/kem.c @@ -5,6 +5,7 @@ #include "owcpa.h" #include "params.h" #include "randombytes.h" +#include "sample.h" #include "verify.h" // API FUNCTIONS @@ -20,15 +21,20 @@ int PQCLEAN_NTRUHPS4096821_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { } int PQCLEAN_NTRUHPS4096821_CLEAN_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk) { + poly r, m; uint8_t rm[NTRU_OWCPA_MSGBYTES]; uint8_t rm_seed[NTRU_SAMPLE_RM_BYTES]; randombytes(rm_seed, NTRU_SAMPLE_RM_BYTES); - PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_samplemsg(rm, rm_seed); + PQCLEAN_NTRUHPS4096821_CLEAN_sample_rm(&r, &m, rm_seed); + + PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_tobytes(rm, &r); + PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, &m); sha3_256(k, rm, NTRU_OWCPA_MSGBYTES); - PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_enc(c, rm, pk); + PQCLEAN_NTRUHPS4096821_CLEAN_poly_Z3_to_Zq(&r); + PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_enc(c, &r, &m, pk); return 0; } @@ -38,9 +44,14 @@ int PQCLEAN_NTRUHPS4096821_CLEAN_crypto_kem_dec(uint8_t *k, const uint8_t *c, co uint8_t rm[NTRU_OWCPA_MSGBYTES]; uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES]; - fail = PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_dec(rm, c, sk); - /* If fail = 0 then c = Enc(h, rm), there is no need to re-encapsulate. */ - /* See comment in PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_dec for details. */ + fail = 0; + + /* Check that unused bits of last byte of ciphertext are zero */ + fail |= c[NTRU_CIPHERTEXTBYTES - 1] & (0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG)))); + + fail |= PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_dec(rm, c, sk); + /* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */ + /* See comment in PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_dec for details. */ sha3_256(k, rm, NTRU_OWCPA_MSGBYTES); diff --git a/crypto_kem/ntruhps4096821/clean/owcpa.c b/crypto_kem/ntruhps4096821/clean/owcpa.c index 29845252..abd9b07f 100644 --- a/crypto_kem/ntruhps4096821/clean/owcpa.c +++ b/crypto_kem/ntruhps4096821/clean/owcpa.c @@ -13,7 +13,7 @@ static int owcpa_check_r(const poly *r) { t |= c & (NTRU_Q - 4); /* 0 if c is in {0,1,2,3} */ t |= (c + 1) & 0x4; /* 0 if c is in {0,1,2} */ } - t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */ + t |= MODQ(r->coeffs[NTRU_N - 1]); /* Coefficient n-1 must be zero */ t = (~t + 1); // two's complement t >>= 63; return (int) t; @@ -38,16 +38,6 @@ static int owcpa_check_m(const poly *m) { return (int) t; } -void PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_samplemsg(unsigned char msg[NTRU_OWCPA_MSGBYTES], - const unsigned char seed[NTRU_SAMPLE_RM_BYTES]) { - poly r, m; - - PQCLEAN_NTRUHPS4096821_CLEAN_sample_rm(&r, &m, seed); - - PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_tobytes(msg, &r); - PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_tobytes(msg + NTRU_PACK_TRINARY_BYTES, &m); -} - void PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_keypair(unsigned char *pk, unsigned char *sk, const unsigned char seed[NTRU_SAMPLE_FG_BYTES]) { @@ -55,9 +45,8 @@ void PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_keypair(unsigned char *pk, poly x1, x2, x3, x4, x5; - poly *f = &x1, *invf_mod3 = &x2; - poly *g = &x3, *G = &x2; - poly *Gf = &x3, *invGf = &x4, *tmp = &x5; + poly *f = &x1, *g = &x2, *invf_mod3 = &x3; + poly *gf = &x3, *invgf = &x4, *tmp = &x5; poly *invh = &x3, *h = &x3; PQCLEAN_NTRUHPS4096821_CLEAN_sample_fg(f, g, seed); @@ -70,45 +59,41 @@ void PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_keypair(unsigned char *pk, PQCLEAN_NTRUHPS4096821_CLEAN_poly_Z3_to_Zq(f); PQCLEAN_NTRUHPS4096821_CLEAN_poly_Z3_to_Zq(g); - /* G = 3*g */ + /* g = 3*g */ for (i = 0; i < NTRU_N; i++) { - G->coeffs[i] = MODQ(3 * g->coeffs[i]); + g->coeffs[i] = 3 * g->coeffs[i]; } - PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul(Gf, G, f); + PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul(gf, g, f); - PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_inv(invGf, Gf); + PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_inv(invgf, gf); - PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul(tmp, invGf, f); + PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul(tmp, invgf, f); PQCLEAN_NTRUHPS4096821_CLEAN_poly_Sq_mul(invh, tmp, f); PQCLEAN_NTRUHPS4096821_CLEAN_poly_Sq_tobytes(sk + 2 * NTRU_PACK_TRINARY_BYTES, invh); - PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul(tmp, invGf, G); - PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul(h, tmp, G); + PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul(tmp, invgf, g); + PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul(h, tmp, g); PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_sum_zero_tobytes(pk, h); } void PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_enc(unsigned char *c, - const unsigned char *rm, + const poly *r, + const poly *m, const unsigned char *pk) { int i; - poly x1, x2, x3; + poly x1, x2; poly *h = &x1, *liftm = &x1; - poly *r = &x2, *m = &x2; - poly *ct = &x3; + poly *ct = &x2; PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_sum_zero_frombytes(h, pk); - PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_frombytes(r, rm); - PQCLEAN_NTRUHPS4096821_CLEAN_poly_Z3_to_Zq(r); - PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul(ct, r, h); - PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_frombytes(m, rm + NTRU_PACK_TRINARY_BYTES); PQCLEAN_NTRUHPS4096821_CLEAN_poly_lift(liftm, m); for (i = 0; i < NTRU_N; i++) { - ct->coeffs[i] = MODQ(ct->coeffs[i] + liftm->coeffs[i]); + ct->coeffs[i] = ct->coeffs[i] + liftm->coeffs[i]; } PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_sum_zero_tobytes(c, ct); @@ -147,7 +132,7 @@ int PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_dec(unsigned char *rm, /* b = c - Lift(m) mod (q, x^n - 1) */ PQCLEAN_NTRUHPS4096821_CLEAN_poly_lift(liftm, m); for (i = 0; i < NTRU_N; i++) { - b->coeffs[i] = MODQ(c->coeffs[i] - liftm->coeffs[i]); + b->coeffs[i] = c->coeffs[i] - liftm->coeffs[i]; } /* r = b / h mod (q, Phi_n) */ diff --git a/crypto_kem/ntruhps4096821/clean/owcpa.h b/crypto_kem/ntruhps4096821/clean/owcpa.h index c0997575..b4a4dd63 100644 --- a/crypto_kem/ntruhps4096821/clean/owcpa.h +++ b/crypto_kem/ntruhps4096821/clean/owcpa.h @@ -2,6 +2,7 @@ #define OWCPA_H #include "params.h" +#include "poly.h" void PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_samplemsg(unsigned char msg[NTRU_OWCPA_MSGBYTES], const unsigned char seed[NTRU_SEEDBYTES]); @@ -11,7 +12,8 @@ void PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_keypair(unsigned char *pk, const unsigned char seed[NTRU_SEEDBYTES]); void PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_enc(unsigned char *c, - const unsigned char *rm, + const poly *r, + const poly *m, const unsigned char *pk); int PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_dec(unsigned char *rm, diff --git a/crypto_kem/ntruhps4096821/clean/pack3.c b/crypto_kem/ntruhps4096821/clean/pack3.c index 6bfed9a8..6b7ffe02 100644 --- a/crypto_kem/ntruhps4096821/clean/pack3.c +++ b/crypto_kem/ntruhps4096821/clean/pack3.c @@ -21,12 +21,13 @@ void PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_frombytes(poly *r, const unsigned char for (i = 0; i < NTRU_PACK_DEG / 5; i++) { c = msg[i]; - r->coeffs[5 * i + 0] = PQCLEAN_NTRUHPS4096821_CLEAN_mod3(c); - r->coeffs[5 * i + 1] = PQCLEAN_NTRUHPS4096821_CLEAN_mod3(c * 171 >> 9); // this is division by 3 - r->coeffs[5 * i + 2] = PQCLEAN_NTRUHPS4096821_CLEAN_mod3(c * 57 >> 9); // division by 3^2 - r->coeffs[5 * i + 3] = PQCLEAN_NTRUHPS4096821_CLEAN_mod3(c * 19 >> 9); // division by 3^3 - r->coeffs[5 * i + 4] = PQCLEAN_NTRUHPS4096821_CLEAN_mod3(c * 203 >> 14); // etc. + r->coeffs[5 * i + 0] = c; + r->coeffs[5 * i + 1] = c * 171 >> 9; // this is division by 3 + r->coeffs[5 * i + 2] = c * 57 >> 9; // division by 3^2 + r->coeffs[5 * i + 3] = c * 19 >> 9; // division by 3^3 + r->coeffs[5 * i + 4] = c * 203 >> 14; // etc. } r->coeffs[NTRU_N - 1] = 0; + PQCLEAN_NTRUHPS4096821_CLEAN_poly_mod_3_Phi_n(r); } diff --git a/crypto_kem/ntruhps4096821/clean/packq.c b/crypto_kem/ntruhps4096821/clean/packq.c index c0ffe4cc..ce966192 100644 --- a/crypto_kem/ntruhps4096821/clean/packq.c +++ b/crypto_kem/ntruhps4096821/clean/packq.c @@ -5,9 +5,9 @@ void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Sq_tobytes(unsigned char *r, const poly * int i; for (i = 0; i < NTRU_PACK_DEG / 2; i++) { - r[3 * i + 0] = (unsigned char) ( a->coeffs[2 * i + 0] & 0xff); - r[3 * i + 1] = (unsigned char) ((a->coeffs[2 * i + 0] >> 8) | ((a->coeffs[2 * i + 1] & 0x0f) << 4)); - r[3 * i + 2] = (unsigned char) ((a->coeffs[2 * i + 1] >> 4)); + r[3 * i + 0] = (unsigned char) ( MODQ(a->coeffs[2 * i + 0]) & 0xff); + r[3 * i + 1] = (unsigned char) ((MODQ(a->coeffs[2 * i + 0]) >> 8) | ((MODQ(a->coeffs[2 * i + 1]) & 0x0f) << 4)); + r[3 * i + 2] = (unsigned char) ((MODQ(a->coeffs[2 * i + 1]) >> 4)); } } @@ -17,6 +17,7 @@ void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Sq_frombytes(poly *r, const unsigned char r->coeffs[2 * i + 0] = (a[3 * i + 0] >> 0) | (((uint16_t)a[3 * i + 1] & 0x0f) << 8); r->coeffs[2 * i + 1] = (a[3 * i + 1] >> 4) | (((uint16_t)a[3 * i + 2] & 0xff) << 4); } + r->coeffs[NTRU_N - 1] = 0; } void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a) { @@ -30,7 +31,6 @@ void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_sum_zero_frombytes(poly *r, const unsi /* Set r[n-1] so that the sum of coefficients is zero mod q */ r->coeffs[NTRU_N - 1] = 0; for (i = 0; i < NTRU_PACK_DEG; i++) { - r->coeffs[NTRU_N - 1] += r->coeffs[i]; + r->coeffs[NTRU_N - 1] -= r->coeffs[i]; } - r->coeffs[NTRU_N - 1] = MODQ(-(r->coeffs[NTRU_N - 1])); } diff --git a/crypto_kem/ntruhps4096821/clean/params.h b/crypto_kem/ntruhps4096821/clean/params.h index 18e8b1d7..313938c6 100644 --- a/crypto_kem/ntruhps4096821/clean/params.h +++ b/crypto_kem/ntruhps4096821/clean/params.h @@ -7,6 +7,8 @@ /* Do not modify below this line */ +#define PAD32(X) ((((X) + 31)/32)*32) + #define NTRU_Q (1 << NTRU_LOGQ) #define NTRU_WEIGHT (NTRU_Q/8 - 2) diff --git a/crypto_kem/ntruhps4096821/clean/poly.c b/crypto_kem/ntruhps4096821/clean/poly.c index bda3c857..6cf32fed 100644 --- a/crypto_kem/ntruhps4096821/clean/poly.c +++ b/crypto_kem/ntruhps4096821/clean/poly.c @@ -1,21 +1,4 @@ #include "poly.h" -#include "fips202.h" -#include "verify.h" - -uint16_t PQCLEAN_NTRUHPS4096821_CLEAN_mod3(uint16_t a) { - uint16_t r; - int16_t t, c; - - r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 - r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 - r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 - r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 - - t = r - 3; - c = t >> 15; - - return (c & r) ^ (~c & t); -} /* Map {0, 1, 2} -> {0,1,q-1} in place */ void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Z3_to_Zq(poly *r) { @@ -29,197 +12,22 @@ void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Z3_to_Zq(poly *r) { void PQCLEAN_NTRUHPS4096821_CLEAN_poly_trinary_Zq_to_Z3(poly *r) { int i; for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = MODQ(r->coeffs[i]); r->coeffs[i] = 3 & (r->coeffs[i] ^ (r->coeffs[i] >> (NTRU_LOGQ - 1))); } } -void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b) { - int k, i; - - for (k = 0; k < NTRU_N; k++) { - r->coeffs[k] = 0; - for (i = 1; i < NTRU_N - k; i++) { - r->coeffs[k] += a->coeffs[k + i] * b->coeffs[NTRU_N - i]; - } - for (i = 0; i < k + 1; i++) { - r->coeffs[k] += a->coeffs[k - i] * b->coeffs[i]; - } - r->coeffs[k] = MODQ(r->coeffs[k]); - } -} - void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Sq_mul(poly *r, const poly *a, const poly *b) { - int i; PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul(r, a, b); - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = MODQ(r->coeffs[i] - r->coeffs[NTRU_N - 1]); - } + PQCLEAN_NTRUHPS4096821_CLEAN_poly_mod_q_Phi_n(r); } void PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_mul(poly *r, const poly *a, const poly *b) { - int k, i; - - for (k = 0; k < NTRU_N; k++) { - r->coeffs[k] = 0; - for (i = 1; i < NTRU_N - k; i++) { - r->coeffs[k] += a->coeffs[k + i] * b->coeffs[NTRU_N - i]; - } - for (i = 0; i < k + 1; i++) { - r->coeffs[k] += a->coeffs[k - i] * b->coeffs[i]; - } - } - for (k = 0; k < NTRU_N; k++) { - r->coeffs[k] = PQCLEAN_NTRUHPS4096821_CLEAN_mod3(r->coeffs[k] + 2 * r->coeffs[NTRU_N - 1]); - } + PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul(r, a, b); + PQCLEAN_NTRUHPS4096821_CLEAN_poly_mod_3_Phi_n(r); } -void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul_x_minus_1(poly *r, const poly *a) { - int i; - uint16_t last_coeff = a->coeffs[NTRU_N - 1]; - - for (i = NTRU_N - 1; i > 0; i--) { - r->coeffs[i] = MODQ(a->coeffs[i - 1] + (NTRU_Q - a->coeffs[i])); - } - r->coeffs[0] = MODQ(last_coeff + (NTRU_Q - a->coeffs[0])); -} - -void PQCLEAN_NTRUHPS4096821_CLEAN_poly_lift(poly *r, const poly *a) { - int i; - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = a->coeffs[i]; - } - PQCLEAN_NTRUHPS4096821_CLEAN_poly_Z3_to_Zq(r); -} - -void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_to_S3(poly *r, const poly *a) { - /* NOTE: Assumes input is in [0,Q-1]^N */ - /* Produces output in {0,1,2}^N */ - int i; - - /* Center coeffs around 3Q: [0, Q-1] -> [3Q - Q/2, 3Q + Q/2) */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = ((a->coeffs[i] >> (NTRU_LOGQ - 1)) ^ 3) << NTRU_LOGQ; - r->coeffs[i] += a->coeffs[i]; - } - /* Reduce mod (3, Phi) */ - r->coeffs[NTRU_N - 1] = PQCLEAN_NTRUHPS4096821_CLEAN_mod3(r->coeffs[NTRU_N - 1]); - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = PQCLEAN_NTRUHPS4096821_CLEAN_mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); - } -} - -#define POLY_R2_ADD(I,A,B,S) \ - for ((I)=0; (I)coeffs[i] ^ b->coeffs[i]) & swap; - a->coeffs[i] ^= t; - b->coeffs[i] ^= t; - } -} - -static inline void poly_divx(poly *a, int s) { - int i; - - for (i = 1; i < NTRU_N; i++) { - a->coeffs[i - 1] = (unsigned char) ((s * a->coeffs[i]) | (!s * a->coeffs[i - 1])); - } - a->coeffs[NTRU_N - 1] = (!s * a->coeffs[NTRU_N - 1]); -} - -static inline void poly_mulx(poly *a, int s) { - int i; - - for (i = 1; i < NTRU_N; i++) { - a->coeffs[NTRU_N - i] = (unsigned char) ((s * a->coeffs[NTRU_N - i - 1]) | (!s * a->coeffs[NTRU_N - i])); - } - a->coeffs[0] = (!s * a->coeffs[0]); -} - -static void poly_R2_inv(poly *r, const poly *a) { - /* Schroeppel--Orman--O'Malley--Spatscheck - * "Almost Inverse" algorithm as described - * by Silverman in NTRU Tech Report #14 */ - // with several modifications to make it run in constant-time - int i, j; - int k = 0; - uint16_t degf = NTRU_N - 1; - uint16_t degg = NTRU_N - 1; - int sign, t, swap; - int16_t done = 0; - poly b, f, g; - poly *c = r; // save some stack space - poly *temp_r = &f; - - /* b(X) := 1 */ - for (i = 1; i < NTRU_N; i++) { - b.coeffs[i] = 0; - } - b.coeffs[0] = 1; - - /* c(X) := 0 */ - for (i = 0; i < NTRU_N; i++) { - c->coeffs[i] = 0; - } - - /* f(X) := a(X) */ - for (i = 0; i < NTRU_N; i++) { - f.coeffs[i] = a->coeffs[i] & 1; - } - - /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ - for (i = 0; i < NTRU_N; i++) { - g.coeffs[i] = 1; - } - - for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { - sign = f.coeffs[0]; - swap = sign & !done & ((degf - degg) >> 15); - - cswappoly(&f, &g, swap); - cswappoly(&b, c, swap); - t = (degf ^ degg) & (-swap); - degf ^= t; - degg ^= t; - - POLY_R2_ADD(i, f, g, sign * (!done)); - POLY_R2_ADD(i, b, (*c), sign * (!done)); - - poly_divx(&f, !done); - poly_mulx(c, !done); - degf -= !done; - k += !done; - - done = 1 - (((uint16_t) - degf) >> 15); - } - - k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); - - /* Return X^{N-k} * b(X) */ - /* This is a k-coefficient rotation. We do this by looking at the binary - representation of k, rotating for every power of 2, and performing a cmov - if the respective bit is set. */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = b.coeffs[i]; - } - - for (i = 0; i < 10; i++) { - for (j = 0; j < NTRU_N; j++) { - temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; - } - PQCLEAN_NTRUHPS4096821_CLEAN_cmov((unsigned char *) & (r->coeffs), - (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); - k >>= 1; - } -} - -static void poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { +static void PQCLEAN_NTRUHPS4096821_CLEAN_poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { int i; poly b, c; @@ -228,7 +36,7 @@ static void poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { // for 0..4 // ai = ai * (2 - a*ai) mod q for (i = 0; i < NTRU_N; i++) { - b.coeffs[i] = MODQ(NTRU_Q - a->coeffs[i]); // b = -a + b.coeffs[i] = -(a->coeffs[i]); } for (i = 0; i < NTRU_N; i++) { @@ -254,92 +62,6 @@ static void poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_inv(poly *r, const poly *a) { poly ai2; - poly_R2_inv(&ai2, a); - poly_R2_inv_to_Rq_inv(r, &ai2, a); -} - -void PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_inv(poly *r, const poly *a) { - /* Schroeppel--Orman--O'Malley--Spatscheck - * "Almost Inverse" algorithm as described - * by Silverman in NTRU Tech Report #14 */ - // with several modifications to make it run in constant-time - int i, j; - uint16_t k = 0; - uint16_t degf = NTRU_N - 1; - uint16_t degg = NTRU_N - 1; - int sign, fsign = 0, t, swap; - int16_t done = 0; - poly b, c, f, g; - poly *temp_r = &f; - - /* b(X) := 1 */ - for (i = 1; i < NTRU_N; i++) { - b.coeffs[i] = 0; - } - b.coeffs[0] = 1; - - /* c(X) := 0 */ - for (i = 0; i < NTRU_N; i++) { - c.coeffs[i] = 0; - } - - /* f(X) := a(X) */ - for (i = 0; i < NTRU_N; i++) { - f.coeffs[i] = a->coeffs[i]; - } - - /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ - for (i = 0; i < NTRU_N; i++) { - g.coeffs[i] = 1; - } - - for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { - sign = PQCLEAN_NTRUHPS4096821_CLEAN_mod3(2 * g.coeffs[0] * f.coeffs[0]); - swap = (((sign & 2) >> 1) | sign) & !done & ((degf - degg) >> 15); - - cswappoly(&f, &g, swap); - cswappoly(&b, &c, swap); - t = (degf ^ degg) & (-swap); - degf ^= t; - degg ^= t; - - for (i = 0; i < NTRU_N; i++) { - f.coeffs[i] = PQCLEAN_NTRUHPS4096821_CLEAN_mod3(f.coeffs[i] + ((uint16_t) (sign * (!done))) * g.coeffs[i]); - } - for (i = 0; i < NTRU_N; i++) { - b.coeffs[i] = PQCLEAN_NTRUHPS4096821_CLEAN_mod3(b.coeffs[i] + ((uint16_t) (sign * (!done))) * c.coeffs[i]); - } - - poly_divx(&f, !done); - poly_mulx(&c, !done); - degf -= !done; - k += !done; - - done = 1 - (((uint16_t) - degf) >> 15); - } - - fsign = f.coeffs[0]; - k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); - - /* Return X^{N-k} * b(X) */ - /* This is a k-coefficient rotation. We do this by looking at the binary - representation of k, rotating for every power of 2, and performing a cmov - if the respective bit is set. */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = PQCLEAN_NTRUHPS4096821_CLEAN_mod3((uint16_t) fsign * b.coeffs[i]); - } - - for (i = 0; i < 10; i++) { - for (j = 0; j < NTRU_N; j++) { - temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; - } - PQCLEAN_NTRUHPS4096821_CLEAN_cmov((unsigned char *) & (r->coeffs), - (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); - k >>= 1; - } - - /* Reduce modulo Phi_n */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = PQCLEAN_NTRUHPS4096821_CLEAN_mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); - } + PQCLEAN_NTRUHPS4096821_CLEAN_poly_R2_inv(&ai2, a); + PQCLEAN_NTRUHPS4096821_CLEAN_poly_R2_inv_to_Rq_inv(r, &ai2, a); } diff --git a/crypto_kem/ntruhps4096821/clean/poly.h b/crypto_kem/ntruhps4096821/clean/poly.h index d7b2dc85..615ee21d 100644 --- a/crypto_kem/ntruhps4096821/clean/poly.h +++ b/crypto_kem/ntruhps4096821/clean/poly.h @@ -6,12 +6,16 @@ #include "params.h" #define MODQ(X) ((X) & (NTRU_Q-1)) -uint16_t PQCLEAN_NTRUHPS4096821_CLEAN_mod3(uint16_t a); typedef struct { + // round to nearest multiple of 32 to make it easier to load into vector + // registers without having to do bound checks +#define NTRU_N_32 PAD32(NTRU_N) uint16_t coeffs[NTRU_N]; } poly; +void PQCLEAN_NTRUHPS4096821_CLEAN_poly_mod_3_Phi_n(poly *r); +void PQCLEAN_NTRUHPS4096821_CLEAN_poly_mod_q_Phi_n(poly *r); void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Sq_tobytes(unsigned char *r, const poly *a); void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Sq_frombytes(poly *r, const unsigned char *a); @@ -24,11 +28,11 @@ void PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_frombytes(poly *r, const unsigned char void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Sq_mul(poly *r, const poly *a, const poly *b); void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b); -void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul_x_minus_1(poly *r, const poly *a); void PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_mul(poly *r, const poly *a, const poly *b); void PQCLEAN_NTRUHPS4096821_CLEAN_poly_lift(poly *r, const poly *a); void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_to_S3(poly *r, const poly *a); +void PQCLEAN_NTRUHPS4096821_CLEAN_poly_R2_inv(poly *r, const poly *a); void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_inv(poly *r, const poly *a); void PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_inv(poly *r, const poly *a); diff --git a/crypto_kem/ntruhps4096821/clean/poly_lift.c b/crypto_kem/ntruhps4096821/clean/poly_lift.c new file mode 100644 index 00000000..6cf524f0 --- /dev/null +++ b/crypto_kem/ntruhps4096821/clean/poly_lift.c @@ -0,0 +1,10 @@ +#include "poly.h" + +void PQCLEAN_NTRUHPS4096821_CLEAN_poly_lift(poly *r, const poly *a) { + int i; + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = a->coeffs[i]; + } + PQCLEAN_NTRUHPS4096821_CLEAN_poly_Z3_to_Zq(r); +} + diff --git a/crypto_kem/ntruhps4096821/clean/poly_mod.c b/crypto_kem/ntruhps4096821/clean/poly_mod.c new file mode 100644 index 00000000..7daa9953 --- /dev/null +++ b/crypto_kem/ntruhps4096821/clean/poly_mod.c @@ -0,0 +1,45 @@ +#include "poly.h" + +static uint16_t mod3(uint16_t a) { + uint16_t r; + int16_t t, c; + + r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 + r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + + t = r - 3; + c = t >> 15; + + return (c & r) ^ (~c & t); +} + +void PQCLEAN_NTRUHPS4096821_CLEAN_poly_mod_3_Phi_n(poly *r) { + int i; + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); + } +} + +void PQCLEAN_NTRUHPS4096821_CLEAN_poly_mod_q_Phi_n(poly *r) { + int i; + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = r->coeffs[i] - r->coeffs[NTRU_N - 1]; + } +} + +void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_to_S3(poly *r, const poly *a) { + /* NOTE: Assumes input is in [0,Q-1]^N */ + /* Produces output in {0,1,2}^N */ + int i; + + /* Center coeffs around 3Q: [0, Q-1] -> [3Q - Q/2, 3Q + Q/2) */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = ((MODQ(a->coeffs[i]) >> (NTRU_LOGQ - 1)) ^ 3) << NTRU_LOGQ; + r->coeffs[i] += MODQ(a->coeffs[i]); + } + + PQCLEAN_NTRUHPS4096821_CLEAN_poly_mod_3_Phi_n(r); +} + diff --git a/crypto_kem/ntruhps4096821/clean/poly_r2_inv.c b/crypto_kem/ntruhps4096821/clean/poly_r2_inv.c new file mode 100644 index 00000000..eb35645a --- /dev/null +++ b/crypto_kem/ntruhps4096821/clean/poly_r2_inv.c @@ -0,0 +1,113 @@ +#include "poly.h" +#include "verify.h" + +#define POLY_R2_ADD(I,A,B,S) \ + for ((I)=0; (I)coeffs[i] ^ b->coeffs[i]) & swap; + a->coeffs[i] ^= t; + b->coeffs[i] ^= t; + } +} + +static inline void poly_divx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[i - 1] = (unsigned char) ((s * a->coeffs[i]) | (!s * a->coeffs[i - 1])); + } + a->coeffs[NTRU_N - 1] = (!s * a->coeffs[NTRU_N - 1]); +} + +static inline void poly_mulx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[NTRU_N - i] = (unsigned char) ((s * a->coeffs[NTRU_N - i - 1]) | (!s * a->coeffs[NTRU_N - i])); + } + a->coeffs[0] = (!s * a->coeffs[0]); +} + +void PQCLEAN_NTRUHPS4096821_CLEAN_poly_R2_inv(poly *r, const poly *a) { + /* Schroeppel--Orman--O'Malley--Spatscheck + * "Almost Inverse" algorithm as described + * by Silverman in NTRU Tech Report #14 */ + // with several modifications to make it run in constant-time + int i, j; + int k = 0; + uint16_t degf = NTRU_N - 1; + uint16_t degg = NTRU_N - 1; + int sign, t, swap; + int16_t done = 0; + poly b, f, g; + poly *c = r; // save some stack space + poly *temp_r = &f; + + /* b(X) := 1 */ + for (i = 1; i < NTRU_N; i++) { + b.coeffs[i] = 0; + } + b.coeffs[0] = 1; + + /* c(X) := 0 */ + for (i = 0; i < NTRU_N; i++) { + c->coeffs[i] = 0; + } + + /* f(X) := a(X) */ + for (i = 0; i < NTRU_N; i++) { + f.coeffs[i] = a->coeffs[i] & 1; + } + + /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ + for (i = 0; i < NTRU_N; i++) { + g.coeffs[i] = 1; + } + + for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { + sign = f.coeffs[0]; + swap = sign & !done & ((degf - degg) >> 15); + + cswappoly(&f, &g, swap); + cswappoly(&b, c, swap); + t = (degf ^ degg) & (-swap); + degf ^= t; + degg ^= t; + + POLY_R2_ADD(i, f, g, sign * (!done)); + POLY_R2_ADD(i, b, (*c), sign * (!done)); + + poly_divx(&f, !done); + poly_mulx(c, !done); + degf -= !done; + k += !done; + + done = 1 - (((uint16_t) - degf) >> 15); + } + + k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); + + /* Return X^{N-k} * b(X) */ + /* This is a k-coefficient rotation. We do this by looking at the binary + representation of k, rotating for every power of 2, and performing a cmov + if the respective bit is set. */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = b.coeffs[i]; + } + + for (i = 0; i < 10; i++) { + for (j = 0; j < NTRU_N; j++) { + temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; + } + PQCLEAN_NTRUHPS4096821_CLEAN_cmov((unsigned char *) & (r->coeffs), + (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); + k >>= 1; + } +} diff --git a/crypto_kem/ntruhps4096821/clean/poly_rq_mul.c b/crypto_kem/ntruhps4096821/clean/poly_rq_mul.c new file mode 100644 index 00000000..e9f11e91 --- /dev/null +++ b/crypto_kem/ntruhps4096821/clean/poly_rq_mul.c @@ -0,0 +1,15 @@ +#include "poly.h" + +void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b) { + int k, i; + + for (k = 0; k < NTRU_N; k++) { + r->coeffs[k] = 0; + for (i = 1; i < NTRU_N - k; i++) { + r->coeffs[k] += a->coeffs[k + i] * b->coeffs[NTRU_N - i]; + } + for (i = 0; i < k + 1; i++) { + r->coeffs[k] += a->coeffs[k - i] * b->coeffs[i]; + } + } +} diff --git a/crypto_kem/ntruhps4096821/clean/poly_s3_inv.c b/crypto_kem/ntruhps4096821/clean/poly_s3_inv.c new file mode 100644 index 00000000..de54adc4 --- /dev/null +++ b/crypto_kem/ntruhps4096821/clean/poly_s3_inv.c @@ -0,0 +1,137 @@ +#include "poly.h" +#include "verify.h" + +static uint16_t mod3(uint16_t a) { + uint16_t r; + int16_t t, c; + + r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 + r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + + t = r - 3; + c = t >> 15; + + return (c & r) ^ (~c & t); +} + +#define POLY_S3_FMADD(I,A,B,S) \ + for ((I)=0; (I)coeffs[i] ^ b->coeffs[i]) & swap; + a->coeffs[i] ^= t; + b->coeffs[i] ^= t; + } +} + +static inline void poly_divx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[i - 1] = (unsigned char) ((s * a->coeffs[i]) | (!s * a->coeffs[i - 1])); + } + a->coeffs[NTRU_N - 1] = (!s * a->coeffs[NTRU_N - 1]); +} + +static inline void poly_mulx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[NTRU_N - i] = (unsigned char) ((s * a->coeffs[NTRU_N - i - 1]) | (!s * a->coeffs[NTRU_N - i])); + } + a->coeffs[0] = (!s * a->coeffs[0]); +} + +void PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_inv(poly *r, const poly *a) { + /* Schroeppel--Orman--O'Malley--Spatscheck + * "Almost Inverse" algorithm as described + * by Silverman in NTRU Tech Report #14 */ + // with several modifications to make it run in constant-time + int i, j; + uint16_t k = 0; + uint16_t degf = NTRU_N - 1; + uint16_t degg = NTRU_N - 1; + int sign, fsign = 0, t, swap; + int16_t done = 0; + poly b, c, f, g; + poly *temp_r = &f; + + /* b(X) := 1 */ + for (i = 1; i < NTRU_N; i++) { + b.coeffs[i] = 0; + } + b.coeffs[0] = 1; + + /* c(X) := 0 */ + for (i = 0; i < NTRU_N; i++) { + c.coeffs[i] = 0; + } + + /* f(X) := a(X) */ + for (i = 0; i < NTRU_N; i++) { + f.coeffs[i] = a->coeffs[i]; + } + + /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ + for (i = 0; i < NTRU_N; i++) { + g.coeffs[i] = 1; + } + + for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { + sign = mod3(2 * g.coeffs[0] * f.coeffs[0]); + swap = (((sign & 2) >> 1) | sign) & !done & ((degf - degg) >> 15); + + cswappoly(&f, &g, swap); + cswappoly(&b, &c, swap); + t = (degf ^ degg) & (-swap); + degf ^= t; + degg ^= t; + + for (i = 0; i < NTRU_N; i++) { + f.coeffs[i] = mod3(f.coeffs[i] + ((uint16_t) (sign * (!done))) * g.coeffs[i]); + } + for (i = 0; i < NTRU_N; i++) { + b.coeffs[i] = mod3(b.coeffs[i] + ((uint16_t) (sign * (!done))) * c.coeffs[i]); + } + + poly_divx(&f, !done); + poly_mulx(&c, !done); + degf -= !done; + k += !done; + + done = 1 - (((uint16_t) - degf) >> 15); + } + + fsign = f.coeffs[0]; + k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); + + /* Return X^{N-k} * b(X) */ + /* This is a k-coefficient rotation. We do this by looking at the binary + representation of k, rotating for every power of 2, and performing a cmov + if the respective bit is set. */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = mod3((uint16_t) fsign * b.coeffs[i]); + } + + for (i = 0; i < 10; i++) { + for (j = 0; j < NTRU_N; j++) { + temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; + } + PQCLEAN_NTRUHPS4096821_CLEAN_cmov((unsigned char *) & (r->coeffs), + (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); + k >>= 1; + } + + /* Reduce modulo Phi_n */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); + } +} diff --git a/crypto_kem/ntruhps4096821/clean/sample.c b/crypto_kem/ntruhps4096821/clean/sample.c index f0409663..40d33546 100644 --- a/crypto_kem/ntruhps4096821/clean/sample.c +++ b/crypto_kem/ntruhps4096821/clean/sample.c @@ -1,5 +1,5 @@ +#include "crypto_sort.h" #include "sample.h" -#include "fips202.h" void PQCLEAN_NTRUHPS4096821_CLEAN_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]) { PQCLEAN_NTRUHPS4096821_CLEAN_sample_iid(f, uniformbytes); @@ -11,17 +11,6 @@ void PQCLEAN_NTRUHPS4096821_CLEAN_sample_rm(poly *r, poly *m, const unsigned cha PQCLEAN_NTRUHPS4096821_CLEAN_sample_fixed_type(m, uniformbytes + NTRU_SAMPLE_IID_BYTES); } -void PQCLEAN_NTRUHPS4096821_CLEAN_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]) { - int i; - /* {0,1,...,255} -> {0,1,2}; Pr[0] = 86/256, Pr[1] = Pr[-1] = 85/256 */ - for (i = 0; i < NTRU_N - 1; i++) { - r->coeffs[i] = PQCLEAN_NTRUHPS4096821_CLEAN_mod3(uniformbytes[i]); - } - - r->coeffs[NTRU_N - 1] = 0; -} - -#include "crypto_sort.h" void PQCLEAN_NTRUHPS4096821_CLEAN_sample_fixed_type(poly *r, const unsigned char u[NTRU_SAMPLE_FT_BYTES]) { // Assumes NTRU_SAMPLE_FT_BYTES = ceil(30*(n-1)/8) diff --git a/crypto_kem/ntruhps4096821/clean/sample_iid.c b/crypto_kem/ntruhps4096821/clean/sample_iid.c new file mode 100644 index 00000000..de0827a2 --- /dev/null +++ b/crypto_kem/ntruhps4096821/clean/sample_iid.c @@ -0,0 +1,26 @@ +#include "sample.h" + +static uint16_t mod3(uint16_t a) { + uint16_t r; + int16_t t, c; + + r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 + r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + + t = r - 3; + c = t >> 15; + + return (c & r) ^ (~c & t); +} + +void PQCLEAN_NTRUHPS4096821_CLEAN_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]) { + int i; + /* {0,1,...,255} -> {0,1,2}; Pr[0] = 86/256, Pr[1] = Pr[-1] = 85/256 */ + for (i = 0; i < NTRU_N - 1; i++) { + r->coeffs[i] = mod3(uniformbytes[i]); + } + + r->coeffs[NTRU_N - 1] = 0; +} diff --git a/crypto_kem/ntruhrss701/META.yml b/crypto_kem/ntruhrss701/META.yml index d6352d1b..7d3d6271 100644 --- a/crypto_kem/ntruhrss701/META.yml +++ b/crypto_kem/ntruhrss701/META.yml @@ -20,4 +20,4 @@ auxiliary-submitters: - Zhenfei Zhang implementations: - name: clean - version: https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-2/submissions/NTRU-Round2.zip reference implemntation + version: https://github.com/jschanck/ntru/tree/485dde03 reference implementation diff --git a/crypto_kem/ntruhrss701/clean/Makefile b/crypto_kem/ntruhrss701/clean/Makefile index 62328bc3..ca8edc5f 100644 --- a/crypto_kem/ntruhrss701/clean/Makefile +++ b/crypto_kem/ntruhrss701/clean/Makefile @@ -2,7 +2,7 @@ LIB=libntruhrss701_clean.a HEADERS=api.h owcpa.h params.h poly.h sample.h verify.h -OBJECTS=kem.o owcpa.o pack3.o packq.o poly.o sample.o verify.o +OBJECTS=kem.o owcpa.o pack3.o packq.o poly.o poly_lift.o poly_mod.o poly_r2_inv.o poly_rq_mul.o poly_s3_inv.o sample.o sample_iid.o verify.o CFLAGS=-O3 -Wall -Wextra -Wpedantic -Werror -Wmissing-prototypes -Wredundant-decls -std=c99 -I../../../common $(EXTRAFLAGS) diff --git a/crypto_kem/ntruhrss701/clean/Makefile.Microsoft_nmake b/crypto_kem/ntruhrss701/clean/Makefile.Microsoft_nmake index ee1dbed0..fc1c8ad0 100644 --- a/crypto_kem/ntruhrss701/clean/Makefile.Microsoft_nmake +++ b/crypto_kem/ntruhrss701/clean/Makefile.Microsoft_nmake @@ -2,7 +2,7 @@ # nmake /f Makefile.Microsoft_nmake LIBRARY=libntruhrss701_clean.lib -OBJECTS=kem.obj owcpa.obj pack3.obj packq.obj poly.obj sample.obj verify.obj +OBJECTS=kem.obj owcpa.obj pack3.obj packq.obj poly.obj poly_lift.obj poly_mod.obj poly_r2_inv.obj poly_rq_mul.obj poly_s3_inv.obj sample.obj sample_iid.obj verify.obj CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX diff --git a/crypto_kem/ntruhrss701/clean/kem.c b/crypto_kem/ntruhrss701/clean/kem.c index 8adc31aa..435a13bc 100644 --- a/crypto_kem/ntruhrss701/clean/kem.c +++ b/crypto_kem/ntruhrss701/clean/kem.c @@ -5,6 +5,7 @@ #include "owcpa.h" #include "params.h" #include "randombytes.h" +#include "sample.h" #include "verify.h" // API FUNCTIONS @@ -20,15 +21,20 @@ int PQCLEAN_NTRUHRSS701_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { } int PQCLEAN_NTRUHRSS701_CLEAN_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk) { + poly r, m; uint8_t rm[NTRU_OWCPA_MSGBYTES]; uint8_t rm_seed[NTRU_SAMPLE_RM_BYTES]; randombytes(rm_seed, NTRU_SAMPLE_RM_BYTES); - PQCLEAN_NTRUHRSS701_CLEAN_owcpa_samplemsg(rm, rm_seed); + PQCLEAN_NTRUHRSS701_CLEAN_sample_rm(&r, &m, rm_seed); + + PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_tobytes(rm, &r); + PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, &m); sha3_256(k, rm, NTRU_OWCPA_MSGBYTES); - PQCLEAN_NTRUHRSS701_CLEAN_owcpa_enc(c, rm, pk); + PQCLEAN_NTRUHRSS701_CLEAN_poly_Z3_to_Zq(&r); + PQCLEAN_NTRUHRSS701_CLEAN_owcpa_enc(c, &r, &m, pk); return 0; } @@ -38,9 +44,14 @@ int PQCLEAN_NTRUHRSS701_CLEAN_crypto_kem_dec(uint8_t *k, const uint8_t *c, const uint8_t rm[NTRU_OWCPA_MSGBYTES]; uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES]; - fail = PQCLEAN_NTRUHRSS701_CLEAN_owcpa_dec(rm, c, sk); - /* If fail = 0 then c = Enc(h, rm), there is no need to re-encapsulate. */ - /* See comment in PQCLEAN_NTRUHRSS701_CLEAN_owcpa_dec for details. */ + fail = 0; + + /* Check that unused bits of last byte of ciphertext are zero */ + fail |= c[NTRU_CIPHERTEXTBYTES - 1] & (0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG)))); + + fail |= PQCLEAN_NTRUHRSS701_CLEAN_owcpa_dec(rm, c, sk); + /* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */ + /* See comment in PQCLEAN_NTRUHRSS701_CLEAN_owcpa_dec for details. */ sha3_256(k, rm, NTRU_OWCPA_MSGBYTES); diff --git a/crypto_kem/ntruhrss701/clean/owcpa.c b/crypto_kem/ntruhrss701/clean/owcpa.c index 0e2227a4..721f0e0d 100644 --- a/crypto_kem/ntruhrss701/clean/owcpa.c +++ b/crypto_kem/ntruhrss701/clean/owcpa.c @@ -13,21 +13,12 @@ static int owcpa_check_r(const poly *r) { t |= c & (NTRU_Q - 4); /* 0 if c is in {0,1,2,3} */ t |= (c + 1) & 0x4; /* 0 if c is in {0,1,2} */ } - t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */ + t |= MODQ(r->coeffs[NTRU_N - 1]); /* Coefficient n-1 must be zero */ t = (~t + 1); // two's complement t >>= 63; return (int) t; } -void PQCLEAN_NTRUHRSS701_CLEAN_owcpa_samplemsg(unsigned char msg[NTRU_OWCPA_MSGBYTES], - const unsigned char seed[NTRU_SAMPLE_RM_BYTES]) { - poly r, m; - - PQCLEAN_NTRUHRSS701_CLEAN_sample_rm(&r, &m, seed); - - PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_tobytes(msg, &r); - PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_tobytes(msg + NTRU_PACK_TRINARY_BYTES, &m); -} void PQCLEAN_NTRUHRSS701_CLEAN_owcpa_keypair(unsigned char *pk, unsigned char *sk, @@ -36,9 +27,8 @@ void PQCLEAN_NTRUHRSS701_CLEAN_owcpa_keypair(unsigned char *pk, poly x1, x2, x3, x4, x5; - poly *f = &x1, *invf_mod3 = &x2; - poly *g = &x3, *G = &x2; - poly *Gf = &x3, *invGf = &x4, *tmp = &x5; + poly *f = &x1, *g = &x2, *invf_mod3 = &x3; + poly *gf = &x3, *invgf = &x4, *tmp = &x5; poly *invh = &x3, *h = &x3; PQCLEAN_NTRUHRSS701_CLEAN_sample_fg(f, g, seed); @@ -51,46 +41,42 @@ void PQCLEAN_NTRUHRSS701_CLEAN_owcpa_keypair(unsigned char *pk, PQCLEAN_NTRUHRSS701_CLEAN_poly_Z3_to_Zq(f); PQCLEAN_NTRUHRSS701_CLEAN_poly_Z3_to_Zq(g); - /* G = 3*(x-1)*g */ - PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul_x_minus_1(G, g); - for (i = 0; i < NTRU_N; i++) { - G->coeffs[i] = MODQ(3 * G->coeffs[i]); + /* g = 3*(x-1)*g */ + for (i = NTRU_N - 1; i > 0; i--) { + g->coeffs[i] = 3 * (g->coeffs[i - 1] - g->coeffs[i]); } + g->coeffs[0] = -(3 * g->coeffs[0]); - PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul(Gf, G, f); + PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul(gf, g, f); - PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_inv(invGf, Gf); + PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_inv(invgf, gf); - PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul(tmp, invGf, f); + PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul(tmp, invgf, f); PQCLEAN_NTRUHRSS701_CLEAN_poly_Sq_mul(invh, tmp, f); PQCLEAN_NTRUHRSS701_CLEAN_poly_Sq_tobytes(sk + 2 * NTRU_PACK_TRINARY_BYTES, invh); - PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul(tmp, invGf, G); - PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul(h, tmp, G); + PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul(tmp, invgf, g); + PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul(h, tmp, g); PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_sum_zero_tobytes(pk, h); } void PQCLEAN_NTRUHRSS701_CLEAN_owcpa_enc(unsigned char *c, - const unsigned char *rm, + const poly *r, + const poly *m, const unsigned char *pk) { int i; - poly x1, x2, x3; + poly x1, x2; poly *h = &x1, *liftm = &x1; - poly *r = &x2, *m = &x2; - poly *ct = &x3; + poly *ct = &x2; PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_sum_zero_frombytes(h, pk); - PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_frombytes(r, rm); - PQCLEAN_NTRUHRSS701_CLEAN_poly_Z3_to_Zq(r); - PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul(ct, r, h); - PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_frombytes(m, rm + NTRU_PACK_TRINARY_BYTES); PQCLEAN_NTRUHRSS701_CLEAN_poly_lift(liftm, m); for (i = 0; i < NTRU_N; i++) { - ct->coeffs[i] = MODQ(ct->coeffs[i] + liftm->coeffs[i]); + ct->coeffs[i] = ct->coeffs[i] + liftm->coeffs[i]; } PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_sum_zero_tobytes(c, ct); @@ -128,7 +114,7 @@ int PQCLEAN_NTRUHRSS701_CLEAN_owcpa_dec(unsigned char *rm, /* b = c - Lift(m) mod (q, x^n - 1) */ PQCLEAN_NTRUHRSS701_CLEAN_poly_lift(liftm, m); for (i = 0; i < NTRU_N; i++) { - b->coeffs[i] = MODQ(c->coeffs[i] - liftm->coeffs[i]); + b->coeffs[i] = c->coeffs[i] - liftm->coeffs[i]; } /* r = b / h mod (q, Phi_n) */ diff --git a/crypto_kem/ntruhrss701/clean/owcpa.h b/crypto_kem/ntruhrss701/clean/owcpa.h index 1f2addc9..8179564e 100644 --- a/crypto_kem/ntruhrss701/clean/owcpa.h +++ b/crypto_kem/ntruhrss701/clean/owcpa.h @@ -2,6 +2,7 @@ #define OWCPA_H #include "params.h" +#include "poly.h" void PQCLEAN_NTRUHRSS701_CLEAN_owcpa_samplemsg(unsigned char msg[NTRU_OWCPA_MSGBYTES], const unsigned char seed[NTRU_SEEDBYTES]); @@ -11,7 +12,8 @@ void PQCLEAN_NTRUHRSS701_CLEAN_owcpa_keypair(unsigned char *pk, const unsigned char seed[NTRU_SEEDBYTES]); void PQCLEAN_NTRUHRSS701_CLEAN_owcpa_enc(unsigned char *c, - const unsigned char *rm, + const poly *r, + const poly *m, const unsigned char *pk); int PQCLEAN_NTRUHRSS701_CLEAN_owcpa_dec(unsigned char *rm, diff --git a/crypto_kem/ntruhrss701/clean/pack3.c b/crypto_kem/ntruhrss701/clean/pack3.c index f27a38f9..880454a7 100644 --- a/crypto_kem/ntruhrss701/clean/pack3.c +++ b/crypto_kem/ntruhrss701/clean/pack3.c @@ -21,12 +21,13 @@ void PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_frombytes(poly *r, const unsigned char ms for (i = 0; i < NTRU_PACK_DEG / 5; i++) { c = msg[i]; - r->coeffs[5 * i + 0] = PQCLEAN_NTRUHRSS701_CLEAN_mod3(c); - r->coeffs[5 * i + 1] = PQCLEAN_NTRUHRSS701_CLEAN_mod3(c * 171 >> 9); // this is division by 3 - r->coeffs[5 * i + 2] = PQCLEAN_NTRUHRSS701_CLEAN_mod3(c * 57 >> 9); // division by 3^2 - r->coeffs[5 * i + 3] = PQCLEAN_NTRUHRSS701_CLEAN_mod3(c * 19 >> 9); // division by 3^3 - r->coeffs[5 * i + 4] = PQCLEAN_NTRUHRSS701_CLEAN_mod3(c * 203 >> 14); // etc. + r->coeffs[5 * i + 0] = c; + r->coeffs[5 * i + 1] = c * 171 >> 9; // this is division by 3 + r->coeffs[5 * i + 2] = c * 57 >> 9; // division by 3^2 + r->coeffs[5 * i + 3] = c * 19 >> 9; // division by 3^3 + r->coeffs[5 * i + 4] = c * 203 >> 14; // etc. } r->coeffs[NTRU_N - 1] = 0; + PQCLEAN_NTRUHRSS701_CLEAN_poly_mod_3_Phi_n(r); } diff --git a/crypto_kem/ntruhrss701/clean/packq.c b/crypto_kem/ntruhrss701/clean/packq.c index 238db8a6..e065b69b 100644 --- a/crypto_kem/ntruhrss701/clean/packq.c +++ b/crypto_kem/ntruhrss701/clean/packq.c @@ -7,7 +7,7 @@ void PQCLEAN_NTRUHRSS701_CLEAN_poly_Sq_tobytes(unsigned char *r, const poly *a) for (i = 0; i < NTRU_PACK_DEG / 8; i++) { for (j = 0; j < 8; j++) { - t[j] = a->coeffs[8 * i + j]; + t[j] = MODQ(a->coeffs[8 * i + j]); } r[13 * i + 0] = (unsigned char) ( t[0] & 0xff); @@ -26,28 +26,30 @@ void PQCLEAN_NTRUHRSS701_CLEAN_poly_Sq_tobytes(unsigned char *r, const poly *a) } for (j = 0; j < NTRU_PACK_DEG - 8 * i; j++) { - t[j] = a->coeffs[8 * i + j]; + t[j] = MODQ(a->coeffs[8 * i + j]); } for (; j < 8; j++) { t[j] = 0; } switch (NTRU_PACK_DEG - 8 * (NTRU_PACK_DEG / 8)) { - case 6: - r[13 * i + 9] = (unsigned char) ((t[5] >> 7) | ((t[6] & 0x03) << 6)); - r[13 * i + 8] = (unsigned char) ((t[4] >> 12) | ((t[5] & 0x7f) << 1)); - r[13 * i + 7] = (unsigned char) ((t[4] >> 4) & 0xff); - // fallthrough + // cases 0 and 6 are impossible since 2 generates (Z/n)* and + // p mod 8 in {1, 7} implies that 2 is a quadratic residue. case 4: - r[13 * i + 6] = (unsigned char) ((t[3] >> 9) | ((t[4] & 0x0f) << 4)); - r[13 * i + 5] = (unsigned char) ((t[3] >> 1) & 0xff); - r[13 * i + 4] = (unsigned char) ((t[2] >> 6) | ((t[3] & 0x01) << 7)); - // fallthrough - case 2: - r[13 * i + 3] = (unsigned char) ((t[1] >> 11) | ((t[2] & 0x3f) << 2)); - r[13 * i + 2] = (unsigned char) ((t[1] >> 3) & 0xff); - r[13 * i + 1] = (unsigned char) ((t[0] >> 8) | ((t[1] & 0x07) << 5)); r[13 * i + 0] = (unsigned char) ( t[0] & 0xff); + r[13 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x07) << 5); + r[13 * i + 2] = (unsigned char) (t[1] >> 3) & 0xff; + r[13 * i + 3] = (unsigned char) (t[1] >> 11) | ((t[2] & 0x3f) << 2); + r[13 * i + 4] = (unsigned char) (t[2] >> 6) | ((t[3] & 0x01) << 7); + r[13 * i + 5] = (unsigned char) (t[3] >> 1) & 0xff; + r[13 * i + 6] = (unsigned char) (t[3] >> 9) | ((t[4] & 0x0f) << 4); + break; + case 2: + r[13 * i + 0] = (unsigned char) ( t[0] & 0xff); + r[13 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x07) << 5); + r[13 * i + 2] = (unsigned char) (t[1] >> 3) & 0xff; + r[13 * i + 3] = (unsigned char) (t[1] >> 11) | ((t[2] & 0x3f) << 2); + break; } } @@ -63,19 +65,21 @@ void PQCLEAN_NTRUHRSS701_CLEAN_poly_Sq_frombytes(poly *r, const unsigned char *a r->coeffs[8 * i + 6] = (a[13 * i + 9] >> 6) | (((uint16_t)a[13 * i + 10] ) << 2) | (((uint16_t)a[13 * i + 11] & 0x07) << 10); r->coeffs[8 * i + 7] = (a[13 * i + 11] >> 3) | (((uint16_t)a[13 * i + 12] ) << 5); } - switch (NTRU_PACK_DEG - 8 * (NTRU_PACK_DEG / 8)) { - case 6: - r->coeffs[8 * i + 5] = (a[13 * i + 8] >> 1) | (((uint16_t)a[13 * i + 9] & 0x3f) << 7); - r->coeffs[8 * i + 4] = (a[13 * i + 6] >> 4) | (((uint16_t)a[13 * i + 7] ) << 4) | (((uint16_t)a[13 * i + 8] & 0x01) << 12); - // fallthrough + switch (NTRU_PACK_DEG & 0x07) { + // cases 0 and 6 are impossible since 2 generates (Z/n)* and + // p mod 8 in {1, 7} implies that 2 is a quadratic residue. case 4: - r->coeffs[8 * i + 3] = (a[13 * i + 4] >> 7) | (((uint16_t)a[13 * i + 5] ) << 1) | (((uint16_t)a[13 * i + 6] & 0x0f) << 9); - r->coeffs[8 * i + 2] = (a[13 * i + 3] >> 2) | (((uint16_t)a[13 * i + 4] & 0x7f) << 6); - // fallthrough - case 2: - r->coeffs[8 * i + 1] = (a[13 * i + 1] >> 5) | (((uint16_t)a[13 * i + 2] ) << 3) | (((uint16_t)a[13 * i + 3] & 0x03) << 11); r->coeffs[8 * i + 0] = a[13 * i + 0] | (((uint16_t)a[13 * i + 1] & 0x1f) << 8); + r->coeffs[8 * i + 1] = (a[13 * i + 1] >> 5) | (((uint16_t)a[13 * i + 2] ) << 3) | (((uint16_t)a[13 * i + 3] & 0x03) << 11); + r->coeffs[8 * i + 2] = (a[13 * i + 3] >> 2) | (((uint16_t)a[13 * i + 4] & 0x7f) << 6); + r->coeffs[8 * i + 3] = (a[13 * i + 4] >> 7) | (((uint16_t)a[13 * i + 5] ) << 1) | (((uint16_t)a[13 * i + 6] & 0x0f) << 9); + break; + case 2: + r->coeffs[8 * i + 0] = a[13 * i + 0] | (((uint16_t)a[13 * i + 1] & 0x1f) << 8); + r->coeffs[8 * i + 1] = (a[13 * i + 1] >> 5) | (((uint16_t)a[13 * i + 2] ) << 3) | (((uint16_t)a[13 * i + 3] & 0x03) << 11); + break; } + r->coeffs[NTRU_N - 1] = 0; } void PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a) { @@ -89,7 +93,6 @@ void PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_sum_zero_frombytes(poly *r, const unsigne /* Set r[n-1] so that the sum of coefficients is zero mod q */ r->coeffs[NTRU_N - 1] = 0; for (i = 0; i < NTRU_PACK_DEG; i++) { - r->coeffs[NTRU_N - 1] += r->coeffs[i]; + r->coeffs[NTRU_N - 1] -= r->coeffs[i]; } - r->coeffs[NTRU_N - 1] = MODQ(-(r->coeffs[NTRU_N - 1])); } diff --git a/crypto_kem/ntruhrss701/clean/params.h b/crypto_kem/ntruhrss701/clean/params.h index 7948514a..9e0ea714 100644 --- a/crypto_kem/ntruhrss701/clean/params.h +++ b/crypto_kem/ntruhrss701/clean/params.h @@ -7,6 +7,8 @@ /* Do not modify below this line */ +#define PAD32(X) ((((X) + 31)/32)*32) + #define NTRU_Q (1 << NTRU_LOGQ) #define NTRU_SEEDBYTES 32 diff --git a/crypto_kem/ntruhrss701/clean/poly.c b/crypto_kem/ntruhrss701/clean/poly.c index 3d1738fc..8ab941d5 100644 --- a/crypto_kem/ntruhrss701/clean/poly.c +++ b/crypto_kem/ntruhrss701/clean/poly.c @@ -1,21 +1,4 @@ #include "poly.h" -#include "fips202.h" -#include "verify.h" - -uint16_t PQCLEAN_NTRUHRSS701_CLEAN_mod3(uint16_t a) { - uint16_t r; - int16_t t, c; - - r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 - r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 - r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 - r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 - - t = r - 3; - c = t >> 15; - - return (c & r) ^ (~c & t); -} /* Map {0, 1, 2} -> {0,1,q-1} in place */ void PQCLEAN_NTRUHRSS701_CLEAN_poly_Z3_to_Zq(poly *r) { @@ -29,239 +12,22 @@ void PQCLEAN_NTRUHRSS701_CLEAN_poly_Z3_to_Zq(poly *r) { void PQCLEAN_NTRUHRSS701_CLEAN_poly_trinary_Zq_to_Z3(poly *r) { int i; for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = MODQ(r->coeffs[i]); r->coeffs[i] = 3 & (r->coeffs[i] ^ (r->coeffs[i] >> (NTRU_LOGQ - 1))); } } -void PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b) { - int k, i; - - for (k = 0; k < NTRU_N; k++) { - r->coeffs[k] = 0; - for (i = 1; i < NTRU_N - k; i++) { - r->coeffs[k] += a->coeffs[k + i] * b->coeffs[NTRU_N - i]; - } - for (i = 0; i < k + 1; i++) { - r->coeffs[k] += a->coeffs[k - i] * b->coeffs[i]; - } - r->coeffs[k] = MODQ(r->coeffs[k]); - } -} - void PQCLEAN_NTRUHRSS701_CLEAN_poly_Sq_mul(poly *r, const poly *a, const poly *b) { - int i; PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul(r, a, b); - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = MODQ(r->coeffs[i] - r->coeffs[NTRU_N - 1]); - } + PQCLEAN_NTRUHRSS701_CLEAN_poly_mod_q_Phi_n(r); } void PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_mul(poly *r, const poly *a, const poly *b) { - int k, i; - - for (k = 0; k < NTRU_N; k++) { - r->coeffs[k] = 0; - for (i = 1; i < NTRU_N - k; i++) { - r->coeffs[k] += a->coeffs[k + i] * b->coeffs[NTRU_N - i]; - } - for (i = 0; i < k + 1; i++) { - r->coeffs[k] += a->coeffs[k - i] * b->coeffs[i]; - } - } - for (k = 0; k < NTRU_N; k++) { - r->coeffs[k] = PQCLEAN_NTRUHRSS701_CLEAN_mod3(r->coeffs[k] + 2 * r->coeffs[NTRU_N - 1]); - } + PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul(r, a, b); + PQCLEAN_NTRUHRSS701_CLEAN_poly_mod_3_Phi_n(r); } -void PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul_x_minus_1(poly *r, const poly *a) { - int i; - uint16_t last_coeff = a->coeffs[NTRU_N - 1]; - - for (i = NTRU_N - 1; i > 0; i--) { - r->coeffs[i] = MODQ(a->coeffs[i - 1] + (NTRU_Q - a->coeffs[i])); - } - r->coeffs[0] = MODQ(last_coeff + (NTRU_Q - a->coeffs[0])); -} - -void PQCLEAN_NTRUHRSS701_CLEAN_poly_lift(poly *r, const poly *a) { - /* NOTE: Assumes input is in {0,1,2}^N */ - /* Produces output in [0,Q-1]^N */ - int i; - poly b; - uint16_t t, zj; - - /* Define z by = delta_{i,0} mod 3: */ - /* t = -1/N mod p = -N mod 3 */ - /* z[0] = 2 - t mod 3 */ - /* z[1] = 0 mod 3 */ - /* z[j] = z[j-1] + t mod 3 */ - /* We'll compute b = a/(x-1) mod (3, Phi) using */ - /* b[0] = , b[1] = , b[2] = */ - /* b[i] = b[i-3] - (a[i] + a[i-1] + a[i-2]) */ - t = 3 - (NTRU_N % 3); - b.coeffs[0] = a->coeffs[0] * (2 - t) + a->coeffs[1] * 0 + a->coeffs[2] * t; - b.coeffs[1] = a->coeffs[1] * (2 - t) + a->coeffs[2] * 0; - b.coeffs[2] = a->coeffs[2] * (2 - t); - - zj = 0; /* z[1] */ - for (i = 3; i < NTRU_N; i++) { - b.coeffs[0] += a->coeffs[i] * (zj + 2 * t); - b.coeffs[1] += a->coeffs[i] * (zj + t); - b.coeffs[2] += a->coeffs[i] * zj; - zj = (zj + t) % 3; - } - b.coeffs[1] += a->coeffs[0] * (zj + t); - b.coeffs[2] += a->coeffs[0] * zj; - b.coeffs[2] += a->coeffs[1] * (zj + t); - - b.coeffs[0] = b.coeffs[0]; - b.coeffs[1] = b.coeffs[1]; - b.coeffs[2] = b.coeffs[2]; - for (i = 3; i < NTRU_N; i++) { - b.coeffs[i] = b.coeffs[i - 3] + 2 * (a->coeffs[i] + a->coeffs[i - 1] + a->coeffs[i - 2]); - } - - /* Finish reduction mod Phi by subtracting Phi * b[N-1] */ - for (i = 0; i < NTRU_N; i++) { - b.coeffs[i] = PQCLEAN_NTRUHRSS701_CLEAN_mod3(b.coeffs[i] + 2 * b.coeffs[NTRU_N - 1]); - } - - /* Switch from {0,1,2} to {0,1,q-1} coefficient representation */ - PQCLEAN_NTRUHRSS701_CLEAN_poly_Z3_to_Zq(&b); - - /* Multiply by (x-1) */ - PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul_x_minus_1(r, &b); -} - -void PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_to_S3(poly *r, const poly *a) { - /* NOTE: Assumes input is in [0,Q-1]^N */ - /* Produces output in {0,1,2}^N */ - int i; - - /* Center coeffs around 3Q: [0, Q-1] -> [3Q - Q/2, 3Q + Q/2) */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = ((a->coeffs[i] >> (NTRU_LOGQ - 1)) ^ 3) << NTRU_LOGQ; - r->coeffs[i] += a->coeffs[i]; - } - /* Reduce mod (3, Phi) */ - r->coeffs[NTRU_N - 1] = PQCLEAN_NTRUHRSS701_CLEAN_mod3(r->coeffs[NTRU_N - 1]); - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = PQCLEAN_NTRUHRSS701_CLEAN_mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); - } -} - -#define POLY_R2_ADD(I,A,B,S) \ - for ((I)=0; (I)coeffs[i] ^ b->coeffs[i]) & swap; - a->coeffs[i] ^= t; - b->coeffs[i] ^= t; - } -} - -static inline void poly_divx(poly *a, int s) { - int i; - - for (i = 1; i < NTRU_N; i++) { - a->coeffs[i - 1] = (unsigned char) ((s * a->coeffs[i]) | (!s * a->coeffs[i - 1])); - } - a->coeffs[NTRU_N - 1] = (!s * a->coeffs[NTRU_N - 1]); -} - -static inline void poly_mulx(poly *a, int s) { - int i; - - for (i = 1; i < NTRU_N; i++) { - a->coeffs[NTRU_N - i] = (unsigned char) ((s * a->coeffs[NTRU_N - i - 1]) | (!s * a->coeffs[NTRU_N - i])); - } - a->coeffs[0] = (!s * a->coeffs[0]); -} - -static void poly_R2_inv(poly *r, const poly *a) { - /* Schroeppel--Orman--O'Malley--Spatscheck - * "Almost Inverse" algorithm as described - * by Silverman in NTRU Tech Report #14 */ - // with several modifications to make it run in constant-time - int i, j; - int k = 0; - uint16_t degf = NTRU_N - 1; - uint16_t degg = NTRU_N - 1; - int sign, t, swap; - int16_t done = 0; - poly b, f, g; - poly *c = r; // save some stack space - poly *temp_r = &f; - - /* b(X) := 1 */ - for (i = 1; i < NTRU_N; i++) { - b.coeffs[i] = 0; - } - b.coeffs[0] = 1; - - /* c(X) := 0 */ - for (i = 0; i < NTRU_N; i++) { - c->coeffs[i] = 0; - } - - /* f(X) := a(X) */ - for (i = 0; i < NTRU_N; i++) { - f.coeffs[i] = a->coeffs[i] & 1; - } - - /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ - for (i = 0; i < NTRU_N; i++) { - g.coeffs[i] = 1; - } - - for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { - sign = f.coeffs[0]; - swap = sign & !done & ((degf - degg) >> 15); - - cswappoly(&f, &g, swap); - cswappoly(&b, c, swap); - t = (degf ^ degg) & (-swap); - degf ^= t; - degg ^= t; - - POLY_R2_ADD(i, f, g, sign * (!done)); - POLY_R2_ADD(i, b, (*c), sign * (!done)); - - poly_divx(&f, !done); - poly_mulx(c, !done); - degf -= !done; - k += !done; - - done = 1 - (((uint16_t) - degf) >> 15); - } - - k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); - - /* Return X^{N-k} * b(X) */ - /* This is a k-coefficient rotation. We do this by looking at the binary - representation of k, rotating for every power of 2, and performing a cmov - if the respective bit is set. */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = b.coeffs[i]; - } - - for (i = 0; i < 10; i++) { - for (j = 0; j < NTRU_N; j++) { - temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; - } - PQCLEAN_NTRUHRSS701_CLEAN_cmov((unsigned char *) & (r->coeffs), - (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); - k >>= 1; - } -} - -static void poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { +static void PQCLEAN_NTRUHRSS701_CLEAN_poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { int i; poly b, c; @@ -270,7 +36,7 @@ static void poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { // for 0..4 // ai = ai * (2 - a*ai) mod q for (i = 0; i < NTRU_N; i++) { - b.coeffs[i] = MODQ(NTRU_Q - a->coeffs[i]); // b = -a + b.coeffs[i] = -(a->coeffs[i]); } for (i = 0; i < NTRU_N; i++) { @@ -296,92 +62,6 @@ static void poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { void PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_inv(poly *r, const poly *a) { poly ai2; - poly_R2_inv(&ai2, a); - poly_R2_inv_to_Rq_inv(r, &ai2, a); -} - -void PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_inv(poly *r, const poly *a) { - /* Schroeppel--Orman--O'Malley--Spatscheck - * "Almost Inverse" algorithm as described - * by Silverman in NTRU Tech Report #14 */ - // with several modifications to make it run in constant-time - int i, j; - uint16_t k = 0; - uint16_t degf = NTRU_N - 1; - uint16_t degg = NTRU_N - 1; - int sign, fsign = 0, t, swap; - int16_t done = 0; - poly b, c, f, g; - poly *temp_r = &f; - - /* b(X) := 1 */ - for (i = 1; i < NTRU_N; i++) { - b.coeffs[i] = 0; - } - b.coeffs[0] = 1; - - /* c(X) := 0 */ - for (i = 0; i < NTRU_N; i++) { - c.coeffs[i] = 0; - } - - /* f(X) := a(X) */ - for (i = 0; i < NTRU_N; i++) { - f.coeffs[i] = a->coeffs[i]; - } - - /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ - for (i = 0; i < NTRU_N; i++) { - g.coeffs[i] = 1; - } - - for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { - sign = PQCLEAN_NTRUHRSS701_CLEAN_mod3(2 * g.coeffs[0] * f.coeffs[0]); - swap = (((sign & 2) >> 1) | sign) & !done & ((degf - degg) >> 15); - - cswappoly(&f, &g, swap); - cswappoly(&b, &c, swap); - t = (degf ^ degg) & (-swap); - degf ^= t; - degg ^= t; - - for (i = 0; i < NTRU_N; i++) { - f.coeffs[i] = PQCLEAN_NTRUHRSS701_CLEAN_mod3(f.coeffs[i] + ((uint16_t) (sign * (!done))) * g.coeffs[i]); - } - for (i = 0; i < NTRU_N; i++) { - b.coeffs[i] = PQCLEAN_NTRUHRSS701_CLEAN_mod3(b.coeffs[i] + ((uint16_t) (sign * (!done))) * c.coeffs[i]); - } - - poly_divx(&f, !done); - poly_mulx(&c, !done); - degf -= !done; - k += !done; - - done = 1 - (((uint16_t) - degf) >> 15); - } - - fsign = f.coeffs[0]; - k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); - - /* Return X^{N-k} * b(X) */ - /* This is a k-coefficient rotation. We do this by looking at the binary - representation of k, rotating for every power of 2, and performing a cmov - if the respective bit is set. */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = PQCLEAN_NTRUHRSS701_CLEAN_mod3((uint16_t) fsign * b.coeffs[i]); - } - - for (i = 0; i < 10; i++) { - for (j = 0; j < NTRU_N; j++) { - temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; - } - PQCLEAN_NTRUHRSS701_CLEAN_cmov((unsigned char *) & (r->coeffs), - (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); - k >>= 1; - } - - /* Reduce modulo Phi_n */ - for (i = 0; i < NTRU_N; i++) { - r->coeffs[i] = PQCLEAN_NTRUHRSS701_CLEAN_mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); - } + PQCLEAN_NTRUHRSS701_CLEAN_poly_R2_inv(&ai2, a); + PQCLEAN_NTRUHRSS701_CLEAN_poly_R2_inv_to_Rq_inv(r, &ai2, a); } diff --git a/crypto_kem/ntruhrss701/clean/poly.h b/crypto_kem/ntruhrss701/clean/poly.h index 4935987b..c7d0658b 100644 --- a/crypto_kem/ntruhrss701/clean/poly.h +++ b/crypto_kem/ntruhrss701/clean/poly.h @@ -6,12 +6,16 @@ #include "params.h" #define MODQ(X) ((X) & (NTRU_Q-1)) -uint16_t PQCLEAN_NTRUHRSS701_CLEAN_mod3(uint16_t a); typedef struct { + // round to nearest multiple of 32 to make it easier to load into vector + // registers without having to do bound checks +#define NTRU_N_32 PAD32(NTRU_N) uint16_t coeffs[NTRU_N]; } poly; +void PQCLEAN_NTRUHRSS701_CLEAN_poly_mod_3_Phi_n(poly *r); +void PQCLEAN_NTRUHRSS701_CLEAN_poly_mod_q_Phi_n(poly *r); void PQCLEAN_NTRUHRSS701_CLEAN_poly_Sq_tobytes(unsigned char *r, const poly *a); void PQCLEAN_NTRUHRSS701_CLEAN_poly_Sq_frombytes(poly *r, const unsigned char *a); @@ -24,11 +28,11 @@ void PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_frombytes(poly *r, const unsigned char ms void PQCLEAN_NTRUHRSS701_CLEAN_poly_Sq_mul(poly *r, const poly *a, const poly *b); void PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b); -void PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul_x_minus_1(poly *r, const poly *a); void PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_mul(poly *r, const poly *a, const poly *b); void PQCLEAN_NTRUHRSS701_CLEAN_poly_lift(poly *r, const poly *a); void PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_to_S3(poly *r, const poly *a); +void PQCLEAN_NTRUHRSS701_CLEAN_poly_R2_inv(poly *r, const poly *a); void PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_inv(poly *r, const poly *a); void PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_inv(poly *r, const poly *a); diff --git a/crypto_kem/ntruhrss701/clean/poly_lift.c b/crypto_kem/ntruhrss701/clean/poly_lift.c new file mode 100644 index 00000000..6ff5f2c1 --- /dev/null +++ b/crypto_kem/ntruhrss701/clean/poly_lift.c @@ -0,0 +1,69 @@ +#include "poly.h" + +static uint16_t mod3(uint16_t a) { + uint16_t r; + int16_t t, c; + + r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 + r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + + t = r - 3; + c = t >> 15; + + return (c & r) ^ (~c & t); +} + +void PQCLEAN_NTRUHRSS701_CLEAN_poly_lift(poly *r, const poly *a) { + /* NOTE: Assumes input is in {0,1,2}^N */ + /* Produces output in [0,Q-1]^N */ + int i; + poly b; + uint16_t t, zj; + + /* Define z by = delta_{i,0} mod 3: */ + /* t = -1/N mod p = -N mod 3 */ + /* z[0] = 2 - t mod 3 */ + /* z[1] = 0 mod 3 */ + /* z[j] = z[j-1] + t mod 3 */ + /* We'll compute b = a/(x-1) mod (3, Phi) using */ + /* b[0] = , b[1] = , b[2] = */ + /* b[i] = b[i-3] - (a[i] + a[i-1] + a[i-2]) */ + t = 3 - (NTRU_N % 3); + b.coeffs[0] = a->coeffs[0] * (2 - t) + a->coeffs[1] * 0 + a->coeffs[2] * t; + b.coeffs[1] = a->coeffs[1] * (2 - t) + a->coeffs[2] * 0; + b.coeffs[2] = a->coeffs[2] * (2 - t); + + zj = 0; /* z[1] */ + for (i = 3; i < NTRU_N; i++) { + b.coeffs[0] += a->coeffs[i] * (zj + 2 * t); + b.coeffs[1] += a->coeffs[i] * (zj + t); + b.coeffs[2] += a->coeffs[i] * zj; + zj = (zj + t) % 3; + } + b.coeffs[1] += a->coeffs[0] * (zj + t); + b.coeffs[2] += a->coeffs[0] * zj; + b.coeffs[2] += a->coeffs[1] * (zj + t); + + b.coeffs[0] = b.coeffs[0]; + b.coeffs[1] = b.coeffs[1]; + b.coeffs[2] = b.coeffs[2]; + for (i = 3; i < NTRU_N; i++) { + b.coeffs[i] = b.coeffs[i - 3] + 2 * (a->coeffs[i] + a->coeffs[i - 1] + a->coeffs[i - 2]); + } + + /* Finish reduction mod Phi by subtracting Phi * b[N-1] */ + for (i = 0; i < NTRU_N; i++) { + b.coeffs[i] = mod3(b.coeffs[i] + 2 * b.coeffs[NTRU_N - 1]); + } + + /* Switch from {0,1,2} to {0,1,q-1} coefficient representation */ + PQCLEAN_NTRUHRSS701_CLEAN_poly_Z3_to_Zq(&b); + + /* Multiply by (x-1) */ + r->coeffs[0] = -(b.coeffs[0]); + for (i = 0; i < NTRU_N - 1; i++) { + r->coeffs[i + 1] = b.coeffs[i] - b.coeffs[i + 1]; + } +} diff --git a/crypto_kem/ntruhrss701/clean/poly_mod.c b/crypto_kem/ntruhrss701/clean/poly_mod.c new file mode 100644 index 00000000..01d174b1 --- /dev/null +++ b/crypto_kem/ntruhrss701/clean/poly_mod.c @@ -0,0 +1,45 @@ +#include "poly.h" + +static uint16_t mod3(uint16_t a) { + uint16_t r; + int16_t t, c; + + r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 + r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + + t = r - 3; + c = t >> 15; + + return (c & r) ^ (~c & t); +} + +void PQCLEAN_NTRUHRSS701_CLEAN_poly_mod_3_Phi_n(poly *r) { + int i; + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); + } +} + +void PQCLEAN_NTRUHRSS701_CLEAN_poly_mod_q_Phi_n(poly *r) { + int i; + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = r->coeffs[i] - r->coeffs[NTRU_N - 1]; + } +} + +void PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_to_S3(poly *r, const poly *a) { + /* NOTE: Assumes input is in [0,Q-1]^N */ + /* Produces output in {0,1,2}^N */ + int i; + + /* Center coeffs around 3Q: [0, Q-1] -> [3Q - Q/2, 3Q + Q/2) */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = ((MODQ(a->coeffs[i]) >> (NTRU_LOGQ - 1)) ^ 3) << NTRU_LOGQ; + r->coeffs[i] += MODQ(a->coeffs[i]); + } + + PQCLEAN_NTRUHRSS701_CLEAN_poly_mod_3_Phi_n(r); +} + diff --git a/crypto_kem/ntruhrss701/clean/poly_r2_inv.c b/crypto_kem/ntruhrss701/clean/poly_r2_inv.c new file mode 100644 index 00000000..49d1acde --- /dev/null +++ b/crypto_kem/ntruhrss701/clean/poly_r2_inv.c @@ -0,0 +1,113 @@ +#include "poly.h" +#include "verify.h" + +#define POLY_R2_ADD(I,A,B,S) \ + for ((I)=0; (I)coeffs[i] ^ b->coeffs[i]) & swap; + a->coeffs[i] ^= t; + b->coeffs[i] ^= t; + } +} + +static inline void poly_divx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[i - 1] = (unsigned char) ((s * a->coeffs[i]) | (!s * a->coeffs[i - 1])); + } + a->coeffs[NTRU_N - 1] = (!s * a->coeffs[NTRU_N - 1]); +} + +static inline void poly_mulx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[NTRU_N - i] = (unsigned char) ((s * a->coeffs[NTRU_N - i - 1]) | (!s * a->coeffs[NTRU_N - i])); + } + a->coeffs[0] = (!s * a->coeffs[0]); +} + +void PQCLEAN_NTRUHRSS701_CLEAN_poly_R2_inv(poly *r, const poly *a) { + /* Schroeppel--Orman--O'Malley--Spatscheck + * "Almost Inverse" algorithm as described + * by Silverman in NTRU Tech Report #14 */ + // with several modifications to make it run in constant-time + int i, j; + int k = 0; + uint16_t degf = NTRU_N - 1; + uint16_t degg = NTRU_N - 1; + int sign, t, swap; + int16_t done = 0; + poly b, f, g; + poly *c = r; // save some stack space + poly *temp_r = &f; + + /* b(X) := 1 */ + for (i = 1; i < NTRU_N; i++) { + b.coeffs[i] = 0; + } + b.coeffs[0] = 1; + + /* c(X) := 0 */ + for (i = 0; i < NTRU_N; i++) { + c->coeffs[i] = 0; + } + + /* f(X) := a(X) */ + for (i = 0; i < NTRU_N; i++) { + f.coeffs[i] = a->coeffs[i] & 1; + } + + /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ + for (i = 0; i < NTRU_N; i++) { + g.coeffs[i] = 1; + } + + for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { + sign = f.coeffs[0]; + swap = sign & !done & ((degf - degg) >> 15); + + cswappoly(&f, &g, swap); + cswappoly(&b, c, swap); + t = (degf ^ degg) & (-swap); + degf ^= t; + degg ^= t; + + POLY_R2_ADD(i, f, g, sign * (!done)); + POLY_R2_ADD(i, b, (*c), sign * (!done)); + + poly_divx(&f, !done); + poly_mulx(c, !done); + degf -= !done; + k += !done; + + done = 1 - (((uint16_t) - degf) >> 15); + } + + k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); + + /* Return X^{N-k} * b(X) */ + /* This is a k-coefficient rotation. We do this by looking at the binary + representation of k, rotating for every power of 2, and performing a cmov + if the respective bit is set. */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = b.coeffs[i]; + } + + for (i = 0; i < 10; i++) { + for (j = 0; j < NTRU_N; j++) { + temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; + } + PQCLEAN_NTRUHRSS701_CLEAN_cmov((unsigned char *) & (r->coeffs), + (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); + k >>= 1; + } +} diff --git a/crypto_kem/ntruhrss701/clean/poly_rq_mul.c b/crypto_kem/ntruhrss701/clean/poly_rq_mul.c new file mode 100644 index 00000000..629561fc --- /dev/null +++ b/crypto_kem/ntruhrss701/clean/poly_rq_mul.c @@ -0,0 +1,15 @@ +#include "poly.h" + +void PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b) { + int k, i; + + for (k = 0; k < NTRU_N; k++) { + r->coeffs[k] = 0; + for (i = 1; i < NTRU_N - k; i++) { + r->coeffs[k] += a->coeffs[k + i] * b->coeffs[NTRU_N - i]; + } + for (i = 0; i < k + 1; i++) { + r->coeffs[k] += a->coeffs[k - i] * b->coeffs[i]; + } + } +} diff --git a/crypto_kem/ntruhrss701/clean/poly_s3_inv.c b/crypto_kem/ntruhrss701/clean/poly_s3_inv.c new file mode 100644 index 00000000..2924455a --- /dev/null +++ b/crypto_kem/ntruhrss701/clean/poly_s3_inv.c @@ -0,0 +1,137 @@ +#include "poly.h" +#include "verify.h" + +static uint16_t mod3(uint16_t a) { + uint16_t r; + int16_t t, c; + + r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 + r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + + t = r - 3; + c = t >> 15; + + return (c & r) ^ (~c & t); +} + +#define POLY_S3_FMADD(I,A,B,S) \ + for ((I)=0; (I)coeffs[i] ^ b->coeffs[i]) & swap; + a->coeffs[i] ^= t; + b->coeffs[i] ^= t; + } +} + +static inline void poly_divx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[i - 1] = (unsigned char) ((s * a->coeffs[i]) | (!s * a->coeffs[i - 1])); + } + a->coeffs[NTRU_N - 1] = (!s * a->coeffs[NTRU_N - 1]); +} + +static inline void poly_mulx(poly *a, int s) { + int i; + + for (i = 1; i < NTRU_N; i++) { + a->coeffs[NTRU_N - i] = (unsigned char) ((s * a->coeffs[NTRU_N - i - 1]) | (!s * a->coeffs[NTRU_N - i])); + } + a->coeffs[0] = (!s * a->coeffs[0]); +} + +void PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_inv(poly *r, const poly *a) { + /* Schroeppel--Orman--O'Malley--Spatscheck + * "Almost Inverse" algorithm as described + * by Silverman in NTRU Tech Report #14 */ + // with several modifications to make it run in constant-time + int i, j; + uint16_t k = 0; + uint16_t degf = NTRU_N - 1; + uint16_t degg = NTRU_N - 1; + int sign, fsign = 0, t, swap; + int16_t done = 0; + poly b, c, f, g; + poly *temp_r = &f; + + /* b(X) := 1 */ + for (i = 1; i < NTRU_N; i++) { + b.coeffs[i] = 0; + } + b.coeffs[0] = 1; + + /* c(X) := 0 */ + for (i = 0; i < NTRU_N; i++) { + c.coeffs[i] = 0; + } + + /* f(X) := a(X) */ + for (i = 0; i < NTRU_N; i++) { + f.coeffs[i] = a->coeffs[i]; + } + + /* g(X) := 1 + X + X^2 + ... + X^{N-1} */ + for (i = 0; i < NTRU_N; i++) { + g.coeffs[i] = 1; + } + + for (j = 0; j < 2 * (NTRU_N - 1) - 1; j++) { + sign = mod3(2 * g.coeffs[0] * f.coeffs[0]); + swap = (((sign & 2) >> 1) | sign) & !done & ((degf - degg) >> 15); + + cswappoly(&f, &g, swap); + cswappoly(&b, &c, swap); + t = (degf ^ degg) & (-swap); + degf ^= t; + degg ^= t; + + for (i = 0; i < NTRU_N; i++) { + f.coeffs[i] = mod3(f.coeffs[i] + ((uint16_t) (sign * (!done))) * g.coeffs[i]); + } + for (i = 0; i < NTRU_N; i++) { + b.coeffs[i] = mod3(b.coeffs[i] + ((uint16_t) (sign * (!done))) * c.coeffs[i]); + } + + poly_divx(&f, !done); + poly_mulx(&c, !done); + degf -= !done; + k += !done; + + done = 1 - (((uint16_t) - degf) >> 15); + } + + fsign = f.coeffs[0]; + k = k - NTRU_N * ((uint16_t)(NTRU_N - k - 1) >> 15); + + /* Return X^{N-k} * b(X) */ + /* This is a k-coefficient rotation. We do this by looking at the binary + representation of k, rotating for every power of 2, and performing a cmov + if the respective bit is set. */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = mod3((uint16_t) fsign * b.coeffs[i]); + } + + for (i = 0; i < 10; i++) { + for (j = 0; j < NTRU_N; j++) { + temp_r->coeffs[j] = r->coeffs[(j + (1 << i)) % NTRU_N]; + } + PQCLEAN_NTRUHRSS701_CLEAN_cmov((unsigned char *) & (r->coeffs), + (unsigned char *) & (temp_r->coeffs), sizeof(uint16_t) * NTRU_N, k & 1); + k >>= 1; + } + + /* Reduce modulo Phi_n */ + for (i = 0; i < NTRU_N; i++) { + r->coeffs[i] = mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); + } +} diff --git a/crypto_kem/ntruhrss701/clean/sample.c b/crypto_kem/ntruhrss701/clean/sample.c index 9c1c7d3b..51cd725e 100644 --- a/crypto_kem/ntruhrss701/clean/sample.c +++ b/crypto_kem/ntruhrss701/clean/sample.c @@ -1,5 +1,4 @@ #include "sample.h" -#include "fips202.h" void PQCLEAN_NTRUHRSS701_CLEAN_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]) { PQCLEAN_NTRUHRSS701_CLEAN_sample_iid_plus(f, uniformbytes); @@ -11,15 +10,6 @@ void PQCLEAN_NTRUHRSS701_CLEAN_sample_rm(poly *r, poly *m, const unsigned char u PQCLEAN_NTRUHRSS701_CLEAN_sample_iid(m, uniformbytes + NTRU_SAMPLE_IID_BYTES); } -void PQCLEAN_NTRUHRSS701_CLEAN_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]) { - int i; - /* {0,1,...,255} -> {0,1,2}; Pr[0] = 86/256, Pr[1] = Pr[-1] = 85/256 */ - for (i = 0; i < NTRU_N - 1; i++) { - r->coeffs[i] = PQCLEAN_NTRUHRSS701_CLEAN_mod3(uniformbytes[i]); - } - - r->coeffs[NTRU_N - 1] = 0; -} void PQCLEAN_NTRUHRSS701_CLEAN_sample_iid_plus(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]) { /* Sample r using sample_iid then conditionally flip */ diff --git a/crypto_kem/ntruhrss701/clean/sample_iid.c b/crypto_kem/ntruhrss701/clean/sample_iid.c new file mode 100644 index 00000000..82ff4b98 --- /dev/null +++ b/crypto_kem/ntruhrss701/clean/sample_iid.c @@ -0,0 +1,26 @@ +#include "sample.h" + +static uint16_t mod3(uint16_t a) { + uint16_t r; + int16_t t, c; + + r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 + r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 + + t = r - 3; + c = t >> 15; + + return (c & r) ^ (~c & t); +} + +void PQCLEAN_NTRUHRSS701_CLEAN_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]) { + int i; + /* {0,1,...,255} -> {0,1,2}; Pr[0] = 86/256, Pr[1] = Pr[-1] = 85/256 */ + for (i = 0; i < NTRU_N - 1; i++) { + r->coeffs[i] = mod3(uniformbytes[i]); + } + + r->coeffs[NTRU_N - 1] = 0; +}