@@ -167,7 +167,7 @@ FetchContent_Declare( | |||
gbench | |||
SOURCE_DIR ${PROJECT_SOURCE_DIR}/3rd/gbench | |||
GIT_REPOSITORY https://github.com/kriskwiatkowski/benchmark.git | |||
GIT_TAG 49862ab56b6b7c3afd87b80bd5d787ed78ce3b96 | |||
GIT_TAG hdc/release_crypto | |||
) | |||
FetchContent_Populate(gbench) | |||
@@ -236,19 +236,6 @@ add_subdirectory(src/sign/sphincs/sphincs-sha256-192f-robust/clean) | |||
add_subdirectory(src/kem/kyber/kyber512/clean) | |||
add_subdirectory(src/kem/kyber/kyber768/clean) | |||
add_subdirectory(src/kem/kyber/kyber1024/clean) | |||
add_subdirectory(src/kem/saber/lightsaber/clean) | |||
add_subdirectory(src/kem/saber/firesaber/clean) | |||
add_subdirectory(src/kem/saber/saber/clean) | |||
add_subdirectory(src/kem/frodo/frodokem640shake/clean) | |||
add_subdirectory(src/kem/frodo/frodokem976shake/clean) | |||
add_subdirectory(src/kem/frodo/frodokem1344shake/clean) | |||
add_subdirectory(src/kem/ntru/ntruhps4096821/clean) | |||
add_subdirectory(src/kem/ntru/ntruhps2048509/clean) | |||
add_subdirectory(src/kem/ntru/ntruhrss701/clean) | |||
add_subdirectory(src/kem/ntru/ntruhps2048677/clean) | |||
add_subdirectory(src/kem/ntru_prime/ntrulpr761/clean) | |||
add_subdirectory(src/kem/ntru_prime/ntrulpr653/clean) | |||
add_subdirectory(src/kem/ntru_prime/ntrulpr857/clean) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-128/clean) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-192/clean) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-256/clean) | |||
@@ -300,16 +287,6 @@ add_subdirectory(src/sign/sphincs/sphincs-sha256-256s-robust/avx2) | |||
add_subdirectory(src/kem/kyber/kyber512/avx2) | |||
add_subdirectory(src/kem/kyber/kyber768/avx2) | |||
add_subdirectory(src/kem/kyber/kyber1024/avx2) | |||
add_subdirectory(src/kem/saber/lightsaber/avx2) | |||
add_subdirectory(src/kem/saber/firesaber/avx2) | |||
add_subdirectory(src/kem/saber/saber/avx2) | |||
add_subdirectory(src/kem/ntru/ntruhps4096821/avx2) | |||
add_subdirectory(src/kem/ntru/ntruhps2048509/avx2) | |||
add_subdirectory(src/kem/ntru/ntruhrss701/avx2) | |||
add_subdirectory(src/kem/ntru/ntruhps2048677/avx2) | |||
add_subdirectory(src/kem/ntru_prime/ntrulpr761/avx2) | |||
add_subdirectory(src/kem/ntru_prime/ntrulpr653/avx2) | |||
add_subdirectory(src/kem/ntru_prime/ntrulpr857/avx2) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-128/avx2) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-192/avx2) | |||
add_subdirectory(src/kem/hqc/hqc-rmrs-256/avx2) | |||
@@ -8,20 +8,21 @@ Users shouldn't expect any level of security provided by this code. The library | |||
## Supported schemes | |||
| Name | NIST Round | x86 optimized | | |||
|--------------------------|------------|---------------| | |||
| Kyber | 3 | x | | |||
| SABER | 3 | x | | |||
| FrodoKEM | 3 | | | |||
| Dilithium | 3 | x | | |||
| Falcon | 3 | | | |||
| SPHINCS+ SHA256/SHAKE256 | 3 | x | | |||
| NTRU | 3 | x | | |||
| NTRU Prime | 3 | x | | |||
| HQC-RMRS | 3 | x | | |||
| Rainbow | 3 | | | |||
| SIKE/p434 | 3 | x | | |||
| McEliece | 3 | | | |||
| Name | x86 optimized | | |||
|--------------------------|------------| | |||
| Kyber | x | | |||
| Dilithium | x | | |||
| Falcon | | | |||
| SPHINCS+ SHA256/SHAKE256 | x | | |||
## Round 4 algorithms | |||
| Name | x86 optimized | | |||
|--------------------------|------------| | |||
| HQC-RMRS | x | | |||
| SIKE/p434 | x | | |||
| McEliece | | | |||
## Building | |||
@@ -44,22 +44,9 @@ extern "C" { | |||
// Defines supported kem algorithm list. The resulting | |||
// ID of an algorithm is PQC_ALG_KEM_(NAME_AS_BELOW) | |||
#define PQC_SUPPORTED_KEMS(_)\ | |||
_(FRODOKEM640SHAKE) \ | |||
_(FRODOKEM976SHAKE) \ | |||
_(FRODOKEM1344SHAKE) \ | |||
_(KYBER512) \ | |||
_(KYBER768) \ | |||
_(KYBER1024) \ | |||
_(NTRUHPS2048509) \ | |||
_(NTRUHPS4096821) \ | |||
_(NTRUHRSS701) \ | |||
_(NTRUHPS2048677) \ | |||
_(NTRULPR761) \ | |||
_(NTRULPR653) \ | |||
_(NTRULPR857) \ | |||
_(LIGHTSABER) \ | |||
_(SABER) \ | |||
_(FIRESABER) \ | |||
_(HQCRMRS128) \ | |||
_(HQCRMRS192) \ | |||
_(HQCRMRS256) \ | |||
@@ -57,20 +57,6 @@ | |||
#include "sign/dilithium/dilithium5/clean/api.h" | |||
#include "sign/dilithium/dilithium5/avx2/api.h" | |||
#include "sign/falcon/api.h" | |||
#include "kem/ntru/ntruhps4096821/clean/api.h" | |||
#include "kem/ntru/ntruhps4096821/avx2/api.h" | |||
#include "kem/ntru/ntruhps2048509/clean/api.h" | |||
#include "kem/ntru/ntruhps2048509/avx2/api.h" | |||
#include "kem/ntru/ntruhrss701/clean/api.h" | |||
#include "kem/ntru/ntruhrss701/avx2/api.h" | |||
#include "kem/ntru/ntruhps2048677/clean/api.h" | |||
#include "kem/ntru/ntruhps2048677/avx2/api.h" | |||
#include "kem/ntru_prime/ntrulpr761/clean/api.h" | |||
#include "kem/ntru_prime/ntrulpr761/avx2/api.h" | |||
#include "kem/ntru_prime/ntrulpr653/clean/api.h" | |||
#include "kem/ntru_prime/ntrulpr653/avx2/api.h" | |||
#include "kem/ntru_prime/ntrulpr857/clean/api.h" | |||
#include "kem/ntru_prime/ntrulpr857/avx2/api.h" | |||
#include "kem/kyber/kyber768/clean/api.h" | |||
#include "kem/kyber/kyber768/avx2/api.h" | |||
#include "kem/kyber/kyber1024/clean/api.h" | |||
@@ -97,15 +83,6 @@ | |||
#include "kem/mceliece/mceliece6960119/clean/api.h" | |||
#include "kem/mceliece/mceliece348864/avx/api.h" | |||
#include "kem/mceliece/mceliece348864/clean/api.h" | |||
#include "kem/frodo/frodokem976shake/clean/api.h" | |||
#include "kem/frodo/frodokem1344shake/clean/api.h" | |||
#include "kem/frodo/frodokem640shake/clean/api.h" | |||
#include "kem/saber/lightsaber/clean/api.h" | |||
#include "kem/saber/lightsaber/avx2/api.h" | |||
#include "kem/saber/firesaber/clean/api.h" | |||
#include "kem/saber/firesaber/avx2/api.h" | |||
#include "kem/saber/saber/clean/api.h" | |||
#include "kem/saber/saber/avx2/api.h" | |||
#include "kem/hqc/hqc-rmrs-128/clean/api.h" | |||
#include "kem/hqc/hqc-rmrs-192/clean/api.h" | |||
#include "kem/hqc/hqc-rmrs-256/clean/api.h" | |||
@@ -1,10 +0,0 @@ | |||
set( | |||
SRC_CLEAN_FRODOKEM1344SHAKE | |||
kem.c | |||
matrix_shake.c | |||
noise.c | |||
util.c | |||
) | |||
define_kem_alg(frodo1344shake_clean | |||
PQCLEAN_FRODOKEM1344SHAKE_OPT "${SRC_CLEAN_FRODOKEM1344SHAKE}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -1,20 +0,0 @@ | |||
#ifndef PQCLEAN_FRODOKEM1344SHAKE_CLEAN_API_H | |||
#define PQCLEAN_FRODOKEM1344SHAKE_CLEAN_API_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_SECRETKEYBYTES 43088 // sizeof(s) + CRYPTO_PUBLICKEYBYTES + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH | |||
#define PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_PUBLICKEYBYTES 21520 // sizeof(seed_A) + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 | |||
#define PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_BYTES 32 | |||
#define PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_CIPHERTEXTBYTES 21632 // (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + (PARAMS_LOGQ*PARAMS_NBAR*PARAMS_NBAR)/8 | |||
#define PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_ALGNAME "FrodoKEM-1344-SHAKE" | |||
int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk); | |||
int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); | |||
int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); | |||
#endif |
@@ -1,21 +0,0 @@ | |||
#ifndef COMMON_H | |||
#define COMMON_H | |||
int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); | |||
int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(uint16_t *s, size_t n); | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s); | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e); | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b); | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b); | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_key_encode(uint16_t *out, const uint16_t *in); | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_key_decode(uint16_t *out, const uint16_t *in); | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb); | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb); | |||
int8_t PQCLEAN_FRODOKEM1344SHAKE_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len); | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector); | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(uint8_t *mem, size_t n); | |||
uint16_t PQCLEAN_FRODOKEM1344SHAKE_CLEAN_LE_TO_UINT16(uint16_t n); | |||
uint16_t PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(uint16_t n); | |||
#endif |
@@ -1,237 +0,0 @@ | |||
/******************************************************************************************** | |||
* FrodoKEM: Learning with Errors Key Encapsulation | |||
* | |||
* Abstract: Key Encapsulation Mechanism (KEM) based on Frodo | |||
*********************************************************************************************/ | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "fips202.h" | |||
#include "randombytes.h" | |||
#include "api.h" | |||
#include "common.h" | |||
#include "params.h" | |||
int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { | |||
// FrodoKEM's key generation | |||
// Outputs: public key pk ( BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 bytes) | |||
// secret key sk (CRYPTO_BYTES + BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH bytes) | |||
uint8_t *pk_seedA = &pk[0]; | |||
uint8_t *pk_b = &pk[BYTES_SEED_A]; | |||
uint8_t *sk_s = &sk[0]; | |||
uint8_t *sk_pk = &sk[CRYPTO_BYTES]; | |||
uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES]; | |||
uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR]; | |||
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t S[2 * PARAMS_N * PARAMS_NBAR] = {0}; // contains secret data | |||
uint16_t *E = &S[PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
uint8_t randomness[2 * CRYPTO_BYTES + BYTES_SEED_A]; // contains secret data via randomness_s and randomness_seedSE | |||
uint8_t *randomness_s = &randomness[0]; // contains secret data | |||
uint8_t *randomness_seedSE = &randomness[CRYPTO_BYTES]; // contains secret data | |||
uint8_t *randomness_z = &randomness[2 * CRYPTO_BYTES]; | |||
uint8_t shake_input_seedSE[1 + CRYPTO_BYTES]; // contains secret data | |||
// Generate the secret value s, the seed for S and E, and the seed for the seed for A. Add seed_A to the public key | |||
randombytes(randomness, CRYPTO_BYTES + CRYPTO_BYTES + BYTES_SEED_A); | |||
shake(pk_seedA, BYTES_SEED_A, randomness_z, BYTES_SEED_A); | |||
// Generate S and E, and compute B = A*S + E. Generate A on-the-fly | |||
shake_input_seedSE[0] = 0x5F; | |||
memcpy(&shake_input_seedSE[1], randomness_seedSE, CRYPTO_BYTES); | |||
shake((uint8_t *)S, 2 * PARAMS_N * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES); | |||
for (size_t i = 0; i < 2 * PARAMS_N * PARAMS_NBAR; i++) { | |||
S[i] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_LE_TO_UINT16(S[i]); | |||
} | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(S, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(E, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_as_plus_e(B, S, E, pk); | |||
// Encode the second part of the public key | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_pack(pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, B, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ); | |||
// Add s, pk and S to the secret key | |||
memcpy(sk_s, randomness_s, CRYPTO_BYTES); | |||
memcpy(sk_pk, pk, CRYPTO_PUBLICKEYBYTES); | |||
for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) { | |||
S[i] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(S[i]); | |||
} | |||
memcpy(sk_S, S, 2 * PARAMS_N * PARAMS_NBAR); | |||
// Add H(pk) to the secret key | |||
shake(sk_pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES); | |||
// Cleanup: | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)E, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(randomness, 2 * CRYPTO_BYTES); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES); | |||
return 0; | |||
} | |||
int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) { | |||
// FrodoKEM's key encapsulation | |||
const uint8_t *pk_seedA = &pk[0]; | |||
const uint8_t *pk_b = &pk[BYTES_SEED_A]; | |||
uint8_t *ct_c1 = &ct[0]; | |||
uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8]; | |||
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t V[PARAMS_NBAR * PARAMS_NBAR] = {0}; // contains secret data | |||
uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0}; | |||
uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data | |||
uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
uint8_t G2in[BYTES_PKHASH + BYTES_MU]; // contains secret data via mu | |||
uint8_t *pkh = &G2in[0]; | |||
uint8_t *mu = &G2in[BYTES_PKHASH]; // contains secret data | |||
uint8_t G2out[2 * CRYPTO_BYTES]; // contains secret data | |||
uint8_t *seedSE = &G2out[0]; // contains secret data | |||
uint8_t *k = &G2out[CRYPTO_BYTES]; // contains secret data | |||
uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES]; // contains secret data via Fin_k | |||
uint8_t *Fin_ct = &Fin[0]; | |||
uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES]; // contains secret data | |||
uint8_t shake_input_seedSE[1 + CRYPTO_BYTES]; // contains secret data | |||
// pkh <- G_1(pk), generate random mu, compute (seedSE || k) = G_2(pkh || mu) | |||
shake(pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES); | |||
randombytes(mu, BYTES_MU); | |||
shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU); | |||
// Generate Sp and Ep, and compute Bp = Sp*A + Ep. Generate A on-the-fly | |||
shake_input_seedSE[0] = 0x96; | |||
memcpy(&shake_input_seedSE[1], seedSE, CRYPTO_BYTES); | |||
shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES); | |||
for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) { | |||
Sp[i] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_LE_TO_UINT16(Sp[i]); | |||
} | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sa_plus_e(Bp, Sp, Ep, pk_seedA); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_pack(ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, Bp, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ); | |||
// Generate Epp, and compute V = Sp*B + Epp | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sb_plus_e(V, B, Sp, Epp); | |||
// Encode mu, and compute C = V + enc(mu) (mod q) | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_key_encode(C, (uint16_t *)mu); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_add(C, V, C); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_pack(ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, C, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOGQ); | |||
// Compute ss = F(ct||KK) | |||
memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES); | |||
memcpy(Fin_k, k, CRYPTO_BYTES); | |||
shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES); | |||
// Cleanup: | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)V, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(mu, BYTES_MU); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES); | |||
return 0; | |||
} | |||
int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) { | |||
// FrodoKEM's key decapsulation | |||
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t W[PARAMS_NBAR * PARAMS_NBAR] = {0}; // contains secret data | |||
uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0}; | |||
uint16_t CC[PARAMS_NBAR * PARAMS_NBAR] = {0}; | |||
uint16_t BBp[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data | |||
uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
const uint8_t *ct_c1 = &ct[0]; | |||
const uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8]; | |||
const uint8_t *sk_s = &sk[0]; | |||
const uint8_t *sk_pk = &sk[CRYPTO_BYTES]; | |||
const uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES]; | |||
uint16_t S[PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
const uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR]; | |||
const uint8_t *pk_seedA = &sk_pk[0]; | |||
const uint8_t *pk_b = &sk_pk[BYTES_SEED_A]; | |||
uint8_t G2in[BYTES_PKHASH + BYTES_MU]; // contains secret data via muprime | |||
uint8_t *pkh = &G2in[0]; | |||
uint8_t *muprime = &G2in[BYTES_PKHASH]; // contains secret data | |||
uint8_t G2out[2 * CRYPTO_BYTES]; // contains secret data | |||
uint8_t *seedSEprime = &G2out[0]; // contains secret data | |||
uint8_t *kprime = &G2out[CRYPTO_BYTES]; // contains secret data | |||
uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES]; // contains secret data via Fin_k | |||
uint8_t *Fin_ct = &Fin[0]; | |||
uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES]; // contains secret data | |||
uint8_t shake_input_seedSEprime[1 + CRYPTO_BYTES]; // contains secret data | |||
for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) { | |||
S[i] = sk_S[2 * i] | (sk_S[2 * i + 1] << 8); | |||
} | |||
// Compute W = C - Bp*S (mod q), and decode the randomness mu | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_unpack(Bp, PARAMS_N * PARAMS_NBAR, ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, PARAMS_LOGQ); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_unpack(C, PARAMS_NBAR * PARAMS_NBAR, ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, PARAMS_LOGQ); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_bs(W, Bp, S); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sub(W, C, W); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_key_decode((uint16_t *)muprime, W); | |||
// Generate (seedSE' || k') = G_2(pkh || mu') | |||
memcpy(pkh, sk_pkh, BYTES_PKHASH); | |||
shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU); | |||
// Generate Sp and Ep, and compute BBp = Sp*A + Ep. Generate A on-the-fly | |||
shake_input_seedSEprime[0] = 0x96; | |||
memcpy(&shake_input_seedSEprime[1], seedSEprime, CRYPTO_BYTES); | |||
shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSEprime, 1 + CRYPTO_BYTES); | |||
for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) { | |||
Sp[i] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_LE_TO_UINT16(Sp[i]); | |||
} | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sa_plus_e(BBp, Sp, Ep, pk_seedA); | |||
// Generate Epp, and compute W = Sp*B + Epp | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sb_plus_e(W, B, Sp, Epp); | |||
// Encode mu, and compute CC = W + enc(mu') (mod q) | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_key_encode(CC, (uint16_t *)muprime); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_add(CC, W, CC); | |||
// Prepare input to F | |||
memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES); | |||
// Reducing BBp modulo q | |||
for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) { | |||
BBp[i] = BBp[i] & ((1 << PARAMS_LOGQ) - 1); | |||
} | |||
// If (Bp == BBp & C == CC) then ss = F(ct || k'), else ss = F(ct || s) | |||
// Needs to avoid branching on secret data as per: | |||
// Qian Guo, Thomas Johansson, Alexander Nilsson. A key-recovery timing attack on post-quantum | |||
// primitives using the Fujisaki-Okamoto transformation and its application on FrodoKEM. In CRYPTO 2020. | |||
int8_t selector = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_ct_verify(Bp, BBp, PARAMS_N * PARAMS_NBAR) | PQCLEAN_FRODOKEM1344SHAKE_CLEAN_ct_verify(C, CC, PARAMS_NBAR * PARAMS_NBAR); | |||
// If (selector == 0) then load k' to do ss = F(ct || k'), else if (selector == -1) load s to do ss = F(ct || s) | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_ct_select((uint8_t *)Fin_k, (uint8_t *)kprime, (uint8_t *)sk_s, CRYPTO_BYTES, selector); | |||
shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES); | |||
// Cleanup: | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)W, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(muprime, BYTES_MU); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES); | |||
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(shake_input_seedSEprime, 1 + CRYPTO_BYTES); | |||
return 0; | |||
} |
@@ -1,108 +0,0 @@ | |||
/******************************************************************************************** | |||
* FrodoKEM: Learning with Errors Key Encapsulation | |||
* | |||
* Abstract: matrix arithmetic functions used by the KEM | |||
*********************************************************************************************/ | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "fips202.h" | |||
#include "api.h" | |||
#include "common.h" | |||
#include "params.h" | |||
int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) { | |||
// Generate-and-multiply: generate matrix A (N x N) row-wise, multiply by s on the right. | |||
// Inputs: s, e (N x N_BAR) | |||
// Output: out = A*s + e (N x N_BAR) | |||
int j, k; | |||
uint16_t i; | |||
int16_t a_row[4 * PARAMS_N]; | |||
for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) { | |||
*((uint32_t *)&out[i]) = *((uint32_t *)&e[i]); | |||
} | |||
uint8_t seed_A_separated[2 + BYTES_SEED_A]; | |||
uint16_t *seed_A_origin = (uint16_t *)&seed_A_separated; | |||
memcpy(&seed_A_separated[2], seed_A, BYTES_SEED_A); | |||
for (i = 0; i < PARAMS_N; i += 4) { | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(i + 0); | |||
shake128((unsigned char *)(a_row + 0 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(i + 1); | |||
shake128((unsigned char *)(a_row + 1 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(i + 2); | |||
shake128((unsigned char *)(a_row + 2 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(i + 3); | |||
shake128((unsigned char *)(a_row + 3 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
for (k = 0; k < 4 * PARAMS_N; k++) { | |||
a_row[k] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_LE_TO_UINT16(a_row[k]); | |||
} | |||
for (k = 0; k < PARAMS_NBAR; k++) { | |||
uint16_t sum[4] = {0}; | |||
for (j = 0; j < PARAMS_N; j++) { // Matrix-vector multiplication | |||
uint16_t sp = s[k * PARAMS_N + j]; | |||
sum[0] += a_row[0 * PARAMS_N + j] * sp; // Go through four lines with same s | |||
sum[1] += a_row[1 * PARAMS_N + j] * sp; | |||
sum[2] += a_row[2 * PARAMS_N + j] * sp; | |||
sum[3] += a_row[3 * PARAMS_N + j] * sp; | |||
} | |||
out[(i + 0)*PARAMS_NBAR + k] += sum[0]; | |||
out[(i + 2)*PARAMS_NBAR + k] += sum[2]; | |||
out[(i + 1)*PARAMS_NBAR + k] += sum[1]; | |||
out[(i + 3)*PARAMS_NBAR + k] += sum[3]; | |||
} | |||
} | |||
return 1; | |||
} | |||
int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) { | |||
// Generate-and-multiply: generate matrix A (N x N) column-wise, multiply by s' on the left. | |||
// Inputs: s', e' (N_BAR x N) | |||
// Output: out = s'*A + e' (N_BAR x N) | |||
int i, j; | |||
uint16_t kk; | |||
for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) { | |||
*((uint32_t *)&out[i]) = *((uint32_t *)&e[i]); | |||
} | |||
int t = 0; | |||
uint16_t a_cols[4 * PARAMS_N]; | |||
int k; | |||
uint8_t seed_A_separated[2 + BYTES_SEED_A]; | |||
uint16_t *seed_A_origin = (uint16_t *)&seed_A_separated; | |||
memcpy(&seed_A_separated[2], seed_A, BYTES_SEED_A); | |||
for (kk = 0; kk < PARAMS_N; kk += 4) { | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(kk + 0); | |||
shake128((unsigned char *)(a_cols + 0 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(kk + 1); | |||
shake128((unsigned char *)(a_cols + 1 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(kk + 2); | |||
shake128((unsigned char *)(a_cols + 2 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(kk + 3); | |||
shake128((unsigned char *)(a_cols + 3 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
for (i = 0; i < 4 * PARAMS_N; i++) { | |||
a_cols[i] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_LE_TO_UINT16(a_cols[i]); | |||
} | |||
for (i = 0; i < PARAMS_NBAR; i++) { | |||
uint16_t sum[PARAMS_N] = {0}; | |||
for (j = 0; j < 4; j++) { | |||
uint16_t sp = s[i * PARAMS_N + kk + j]; | |||
for (k = 0; k < PARAMS_N; k++) { // Matrix-vector multiplication | |||
sum[k] += (uint16_t)(sp * (uint32_t)a_cols[(t + j) * PARAMS_N + k]); | |||
} | |||
} | |||
for (k = 0; k < PARAMS_N; k++) { | |||
out[i * PARAMS_N + k] += sum[k]; | |||
} | |||
} | |||
} | |||
return 1; | |||
} |
@@ -1,35 +0,0 @@ | |||
/******************************************************************************************** | |||
* FrodoKEM: Learning with Errors Key Encapsulation | |||
* | |||
* Abstract: noise sampling functions | |||
*********************************************************************************************/ | |||
#include <stdint.h> | |||
#include "api.h" | |||
#include "common.h" | |||
#include "params.h" | |||
static const uint16_t CDF_TABLE[CDF_TABLE_LEN] = CDF_TABLE_DATA; | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(uint16_t *s, size_t n) { | |||
// Fills vector s with n samples from the noise distribution which requires 16 bits to sample. | |||
// The distribution is specified by its CDF. | |||
// Input: pseudo-random values (2*n bytes) passed in s. The input is overwritten by the output. | |||
size_t i; | |||
unsigned int j; | |||
for (i = 0; i < n; ++i) { | |||
uint16_t sample = 0; | |||
uint16_t prnd = s[i] >> 1; // Drop the least significant bit | |||
uint16_t sign = s[i] & 0x1; // Pick the least significant bit | |||
// No need to compare with the last value. | |||
for (j = 0; j < (unsigned int)(CDF_TABLE_LEN - 1); j++) { | |||
// Constant time comparison: 1 if CDF_TABLE[j] < s, 0 otherwise. Uses the fact that CDF_TABLE[j] and s fit in 15 bits. | |||
sample += (uint16_t)(CDF_TABLE[j] - prnd) >> 15; | |||
} | |||
// Assuming that sign is either 0 or 1, flips sample iff sign = 1 | |||
s[i] = ((-sign) ^ sample) + sign; | |||
} | |||
} |
@@ -1,27 +0,0 @@ | |||
#ifndef PARAMS_H | |||
#define PARAMS_H | |||
#define CRYPTO_SECRETKEYBYTES PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_SECRETKEYBYTES | |||
#define CRYPTO_PUBLICKEYBYTES PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_PUBLICKEYBYTES | |||
#define CRYPTO_BYTES PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_BYTES | |||
#define CRYPTO_CIPHERTEXTBYTES PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_CIPHERTEXTBYTES | |||
#define PARAMS_N 1344 | |||
#define PARAMS_NBAR 8 | |||
#define PARAMS_LOGQ 16 | |||
#define PARAMS_Q (1 << PARAMS_LOGQ) | |||
#define PARAMS_EXTRACTED_BITS 4 | |||
#define PARAMS_STRIPE_STEP 8 | |||
#define PARAMS_PARALLEL 4 | |||
#define BYTES_SEED_A 16 | |||
#define BYTES_MU ((PARAMS_EXTRACTED_BITS * PARAMS_NBAR * PARAMS_NBAR) / 8) | |||
#define BYTES_PKHASH CRYPTO_BYTES | |||
// Selecting SHAKE XOF function for the KEM and noise sampling | |||
#define shake shake256 | |||
// CDF table | |||
#define CDF_TABLE_DATA {9142, 23462, 30338, 32361, 32725, 32765, 32767} | |||
#define CDF_TABLE_LEN 7 | |||
#endif |
@@ -1,264 +0,0 @@ | |||
/******************************************************************************************** | |||
* FrodoKEM: Learning with Errors Key Encapsulation | |||
* | |||
* Abstract: additional functions for FrodoKEM | |||
*********************************************************************************************/ | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "api.h" | |||
#include "common.h" | |||
#include "params.h" | |||
static inline uint8_t min(uint8_t x, uint8_t y) { | |||
if (x < y) { | |||
return x; | |||
} | |||
return y; | |||
} | |||
uint16_t PQCLEAN_FRODOKEM1344SHAKE_CLEAN_LE_TO_UINT16(uint16_t n) { | |||
return (((uint8_t *) &n)[0] | (((uint8_t *) &n)[1] << 8)); | |||
} | |||
uint16_t PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(uint16_t n) { | |||
uint16_t y; | |||
uint8_t *z = (uint8_t *) &y; | |||
z[0] = n & 0xFF; | |||
z[1] = (n & 0xFF00) >> 8; | |||
return y; | |||
} | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s) { | |||
// Multiply by s on the right | |||
// Inputs: b (N_BAR x N), s (N x N_BAR) | |||
// Output: out = b*s (N_BAR x N_BAR) | |||
int i, j, k; | |||
for (i = 0; i < PARAMS_NBAR; i++) { | |||
for (j = 0; j < PARAMS_NBAR; j++) { | |||
out[i * PARAMS_NBAR + j] = 0; | |||
for (k = 0; k < PARAMS_N; k++) { | |||
out[i * PARAMS_NBAR + j] += (uint16_t)(b[i * PARAMS_N + k] * (uint32_t)s[j * PARAMS_N + k]); | |||
} | |||
out[i * PARAMS_NBAR + j] = (uint32_t)(out[i * PARAMS_NBAR + j]) & ((1 << PARAMS_LOGQ) - 1); | |||
} | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e) { | |||
// Multiply by s on the left | |||
// Inputs: b (N x N_BAR), s (N_BAR x N), e (N_BAR x N_BAR) | |||
// Output: out = s*b + e (N_BAR x N_BAR) | |||
int i, j, k; | |||
for (k = 0; k < PARAMS_NBAR; k++) { | |||
for (i = 0; i < PARAMS_NBAR; i++) { | |||
out[k * PARAMS_NBAR + i] = e[k * PARAMS_NBAR + i]; | |||
for (j = 0; j < PARAMS_N; j++) { | |||
out[k * PARAMS_NBAR + i] += (uint16_t)(s[k * PARAMS_N + j] * (uint32_t)b[j * PARAMS_NBAR + i]); | |||
} | |||
out[k * PARAMS_NBAR + i] = (uint32_t)(out[k * PARAMS_NBAR + i]) & ((1 << PARAMS_LOGQ) - 1); | |||
} | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b) { | |||
// Add a and b | |||
// Inputs: a, b (N_BAR x N_BAR) | |||
// Output: c = a + b | |||
for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) { | |||
out[i] = (a[i] + b[i]) & ((1 << PARAMS_LOGQ) - 1); | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b) { | |||
// Subtract a and b | |||
// Inputs: a, b (N_BAR x N_BAR) | |||
// Output: c = a - b | |||
for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) { | |||
out[i] = (a[i] - b[i]) & ((1 << PARAMS_LOGQ) - 1); | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_key_encode(uint16_t *out, const uint16_t *in) { | |||
// Encoding | |||
unsigned int i, j, npieces_word = 8; | |||
unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8; | |||
uint64_t temp, mask = ((uint64_t)1 << PARAMS_EXTRACTED_BITS) - 1; | |||
uint16_t *pos = out; | |||
for (i = 0; i < nwords; i++) { | |||
temp = 0; | |||
for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) { | |||
temp |= ((uint64_t)((uint8_t *)in)[i * PARAMS_EXTRACTED_BITS + j]) << (8 * j); | |||
} | |||
for (j = 0; j < npieces_word; j++) { | |||
*pos = (uint16_t)((temp & mask) << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS)); | |||
temp >>= PARAMS_EXTRACTED_BITS; | |||
pos++; | |||
} | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_key_decode(uint16_t *out, const uint16_t *in) { | |||
// Decoding | |||
unsigned int i, j, index = 0, npieces_word = 8; | |||
unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8; | |||
uint16_t temp, maskex = ((uint16_t)1 << PARAMS_EXTRACTED_BITS) - 1, maskq = ((uint16_t)1 << PARAMS_LOGQ) - 1; | |||
uint8_t *pos = (uint8_t *)out; | |||
uint64_t templong; | |||
for (i = 0; i < nwords; i++) { | |||
templong = 0; | |||
for (j = 0; j < npieces_word; j++) { // temp = floor(in*2^{-11}+0.5) | |||
temp = ((in[index] & maskq) + (1 << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS - 1))) >> (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS); | |||
templong |= ((uint64_t)(temp & maskex)) << (PARAMS_EXTRACTED_BITS * j); | |||
index++; | |||
} | |||
for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) { | |||
pos[i * PARAMS_EXTRACTED_BITS + j] = (templong >> (8 * j)) & 0xFF; | |||
} | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb) { | |||
// Pack the input uint16 vector into a char output vector, copying lsb bits from each input element. | |||
// If inlen * lsb / 8 > outlen, only outlen * 8 bits are copied. | |||
memset(out, 0, outlen); | |||
size_t i = 0; // whole bytes already filled in | |||
size_t j = 0; // whole uint16_t already copied | |||
uint16_t w = 0; // the leftover, not yet copied | |||
uint8_t bits = 0; // the number of lsb in w | |||
while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) { | |||
/* | |||
in: | | |********|********| | |||
^ | |||
j | |||
w : | ****| | |||
^ | |||
bits | |||
out:|**|**|**|**|**|**|**|**|* | | |||
^^ | |||
ib | |||
*/ | |||
uint8_t b = 0; // bits in out[i] already filled in | |||
while (b < 8) { | |||
int nbits = min(8 - b, bits); | |||
uint16_t mask = (1 << nbits) - 1; | |||
uint8_t t = (uint8_t) ((w >> (bits - nbits)) & mask); // the bits to copy from w to out | |||
out[i] = out[i] + (t << (8 - b - nbits)); | |||
b += (uint8_t) nbits; | |||
bits -= (uint8_t) nbits; | |||
w &= ~(mask << bits); // not strictly necessary; mostly for debugging | |||
if (bits == 0) { | |||
if (j < inlen) { | |||
w = in[j]; | |||
bits = lsb; | |||
j++; | |||
} else { | |||
break; // the input vector is exhausted | |||
} | |||
} | |||
} | |||
if (b == 8) { // out[i] is filled in | |||
i++; | |||
} | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb) { | |||
// Unpack the input char vector into a uint16_t output vector, copying lsb bits | |||
// for each output element from input. outlen must be at least ceil(inlen * 8 / lsb). | |||
memset(out, 0, outlen * sizeof(uint16_t)); | |||
size_t i = 0; // whole uint16_t already filled in | |||
size_t j = 0; // whole bytes already copied | |||
uint8_t w = 0; // the leftover, not yet copied | |||
uint8_t bits = 0; // the number of lsb bits of w | |||
while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) { | |||
/* | |||
in: | | | | | | |**|**|... | |||
^ | |||
j | |||
w : | *| | |||
^ | |||
bits | |||
out:| *****| *****| *** | |... | |||
^ ^ | |||
i b | |||
*/ | |||
uint8_t b = 0; // bits in out[i] already filled in | |||
while (b < lsb) { | |||
int nbits = min(lsb - b, bits); | |||
uint16_t mask = (1 << nbits) - 1; | |||
uint8_t t = (w >> (bits - nbits)) & mask; // the bits to copy from w to out | |||
out[i] = out[i] + (t << (lsb - b - nbits)); | |||
b += (uint8_t) nbits; | |||
bits -= (uint8_t) nbits; | |||
w &= ~(mask << bits); // not strictly necessary; mostly for debugging | |||
if (bits == 0) { | |||
if (j < inlen) { | |||
w = in[j]; | |||
bits = 8; | |||
j++; | |||
} else { | |||
break; // the input vector is exhausted | |||
} | |||
} | |||
} | |||
if (b == lsb) { // out[i] is filled in | |||
i++; | |||
} | |||
} | |||
} | |||
int8_t PQCLEAN_FRODOKEM1344SHAKE_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len) { | |||
// Compare two arrays in constant time. | |||
// Returns 0 if the byte arrays are equal, -1 otherwise. | |||
uint16_t r = 0; | |||
for (size_t i = 0; i < len; i++) { | |||
r |= a[i] ^ b[i]; | |||
} | |||
r = (-(int16_t)(r >> 1) | -(int16_t)(r & 1)) >> (8 * sizeof(uint16_t) -1); | |||
return (int8_t)r; | |||
} | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector) { | |||
// Select one of the two input arrays to be moved to r | |||
// If (selector == 0) then load r with a, else if (selector == -1) load r with b | |||
for (size_t i = 0; i < len; i++) { | |||
r[i] = (~selector & a[i]) | (selector & b[i]); | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(uint8_t *mem, size_t n) { | |||
// Clear 8-bit bytes from memory. "n" indicates the number of bytes to be zeroed. | |||
// This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing. | |||
volatile uint8_t *v = mem; | |||
for (size_t i = 0; i < n; i++) { | |||
v[i] = 0; | |||
} | |||
} |
@@ -1,10 +0,0 @@ | |||
set( | |||
SRC_CLEAN_FRODOKEM640SHAKE | |||
kem.c | |||
matrix_shake.c | |||
noise.c | |||
util.c | |||
) | |||
define_kem_alg(frodo640shake_clean | |||
PQCLEAN_FRODOKEM640SHAKE_OPT "${SRC_CLEAN_FRODOKEM640SHAKE}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -1,20 +0,0 @@ | |||
#ifndef PQCLEAN_FRODOKEM640SHAKE_CLEAN_API_H | |||
#define PQCLEAN_FRODOKEM640SHAKE_CLEAN_API_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_SECRETKEYBYTES 19888 // sizeof(s) + CRYPTO_PUBLICKEYBYTES + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH | |||
#define PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_PUBLICKEYBYTES 9616 // sizeof(seed_A) + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 | |||
#define PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_BYTES 16 | |||
#define PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_CIPHERTEXTBYTES 9720 // (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + (PARAMS_LOGQ*PARAMS_NBAR*PARAMS_NBAR)/8 | |||
#define PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_ALGNAME "FrodoKEM-640-SHAKE" | |||
int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk); | |||
int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); | |||
int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); | |||
#endif |
@@ -1,21 +0,0 @@ | |||
#ifndef COMMON_H | |||
#define COMMON_H | |||
int PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); | |||
int PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(uint16_t *s, size_t n); | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s); | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e); | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b); | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b); | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_key_encode(uint16_t *out, const uint16_t *in); | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_key_decode(uint16_t *out, const uint16_t *in); | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb); | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb); | |||
int8_t PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len); | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector); | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(uint8_t *mem, size_t n); | |||
uint16_t PQCLEAN_FRODOKEM640SHAKE_CLEAN_LE_TO_UINT16(uint16_t n); | |||
uint16_t PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(uint16_t n); | |||
#endif |
@@ -1,237 +0,0 @@ | |||
/******************************************************************************************** | |||
* FrodoKEM: Learning with Errors Key Encapsulation | |||
* | |||
* Abstract: Key Encapsulation Mechanism (KEM) based on Frodo | |||
*********************************************************************************************/ | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "fips202.h" | |||
#include "randombytes.h" | |||
#include "api.h" | |||
#include "common.h" | |||
#include "params.h" | |||
int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { | |||
// FrodoKEM's key generation | |||
// Outputs: public key pk ( BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 bytes) | |||
// secret key sk (CRYPTO_BYTES + BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH bytes) | |||
uint8_t *pk_seedA = &pk[0]; | |||
uint8_t *pk_b = &pk[BYTES_SEED_A]; | |||
uint8_t *sk_s = &sk[0]; | |||
uint8_t *sk_pk = &sk[CRYPTO_BYTES]; | |||
uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES]; | |||
uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR]; | |||
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t S[2 * PARAMS_N * PARAMS_NBAR] = {0}; // contains secret data | |||
uint16_t *E = &S[PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
uint8_t randomness[2 * CRYPTO_BYTES + BYTES_SEED_A]; // contains secret data via randomness_s and randomness_seedSE | |||
uint8_t *randomness_s = &randomness[0]; // contains secret data | |||
uint8_t *randomness_seedSE = &randomness[CRYPTO_BYTES]; // contains secret data | |||
uint8_t *randomness_z = &randomness[2 * CRYPTO_BYTES]; | |||
uint8_t shake_input_seedSE[1 + CRYPTO_BYTES]; // contains secret data | |||
// Generate the secret value s, the seed for S and E, and the seed for the seed for A. Add seed_A to the public key | |||
randombytes(randomness, CRYPTO_BYTES + CRYPTO_BYTES + BYTES_SEED_A); | |||
shake(pk_seedA, BYTES_SEED_A, randomness_z, BYTES_SEED_A); | |||
// Generate S and E, and compute B = A*S + E. Generate A on-the-fly | |||
shake_input_seedSE[0] = 0x5F; | |||
memcpy(&shake_input_seedSE[1], randomness_seedSE, CRYPTO_BYTES); | |||
shake((uint8_t *)S, 2 * PARAMS_N * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES); | |||
for (size_t i = 0; i < 2 * PARAMS_N * PARAMS_NBAR; i++) { | |||
S[i] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_LE_TO_UINT16(S[i]); | |||
} | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(S, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(E, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_as_plus_e(B, S, E, pk); | |||
// Encode the second part of the public key | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_pack(pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, B, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ); | |||
// Add s, pk and S to the secret key | |||
memcpy(sk_s, randomness_s, CRYPTO_BYTES); | |||
memcpy(sk_pk, pk, CRYPTO_PUBLICKEYBYTES); | |||
for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) { | |||
S[i] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(S[i]); | |||
} | |||
memcpy(sk_S, S, 2 * PARAMS_N * PARAMS_NBAR); | |||
// Add H(pk) to the secret key | |||
shake(sk_pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES); | |||
// Cleanup: | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)E, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(randomness, 2 * CRYPTO_BYTES); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES); | |||
return 0; | |||
} | |||
int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) { | |||
// FrodoKEM's key encapsulation | |||
const uint8_t *pk_seedA = &pk[0]; | |||
const uint8_t *pk_b = &pk[BYTES_SEED_A]; | |||
uint8_t *ct_c1 = &ct[0]; | |||
uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8]; | |||
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t V[PARAMS_NBAR * PARAMS_NBAR] = {0}; // contains secret data | |||
uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0}; | |||
uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data | |||
uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
uint8_t G2in[BYTES_PKHASH + BYTES_MU]; // contains secret data via mu | |||
uint8_t *pkh = &G2in[0]; | |||
uint8_t *mu = &G2in[BYTES_PKHASH]; // contains secret data | |||
uint8_t G2out[2 * CRYPTO_BYTES]; // contains secret data | |||
uint8_t *seedSE = &G2out[0]; // contains secret data | |||
uint8_t *k = &G2out[CRYPTO_BYTES]; // contains secret data | |||
uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES]; // contains secret data via Fin_k | |||
uint8_t *Fin_ct = &Fin[0]; | |||
uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES]; // contains secret data | |||
uint8_t shake_input_seedSE[1 + CRYPTO_BYTES]; // contains secret data | |||
// pkh <- G_1(pk), generate random mu, compute (seedSE || k) = G_2(pkh || mu) | |||
shake(pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES); | |||
randombytes(mu, BYTES_MU); | |||
shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU); | |||
// Generate Sp and Ep, and compute Bp = Sp*A + Ep. Generate A on-the-fly | |||
shake_input_seedSE[0] = 0x96; | |||
memcpy(&shake_input_seedSE[1], seedSE, CRYPTO_BYTES); | |||
shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES); | |||
for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) { | |||
Sp[i] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_LE_TO_UINT16(Sp[i]); | |||
} | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sa_plus_e(Bp, Sp, Ep, pk_seedA); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_pack(ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, Bp, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ); | |||
// Generate Epp, and compute V = Sp*B + Epp | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sb_plus_e(V, B, Sp, Epp); | |||
// Encode mu, and compute C = V + enc(mu) (mod q) | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_key_encode(C, (uint16_t *)mu); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_add(C, V, C); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_pack(ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, C, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOGQ); | |||
// Compute ss = F(ct||KK) | |||
memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES); | |||
memcpy(Fin_k, k, CRYPTO_BYTES); | |||
shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES); | |||
// Cleanup: | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)V, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(mu, BYTES_MU); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES); | |||
return 0; | |||
} | |||
int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) { | |||
// FrodoKEM's key decapsulation | |||
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t W[PARAMS_NBAR * PARAMS_NBAR] = {0}; // contains secret data | |||
uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0}; | |||
uint16_t CC[PARAMS_NBAR * PARAMS_NBAR] = {0}; | |||
uint16_t BBp[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data | |||
uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
const uint8_t *ct_c1 = &ct[0]; | |||
const uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8]; | |||
const uint8_t *sk_s = &sk[0]; | |||
const uint8_t *sk_pk = &sk[CRYPTO_BYTES]; | |||
const uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES]; | |||
uint16_t S[PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
const uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR]; | |||
const uint8_t *pk_seedA = &sk_pk[0]; | |||
const uint8_t *pk_b = &sk_pk[BYTES_SEED_A]; | |||
uint8_t G2in[BYTES_PKHASH + BYTES_MU]; // contains secret data via muprime | |||
uint8_t *pkh = &G2in[0]; | |||
uint8_t *muprime = &G2in[BYTES_PKHASH]; // contains secret data | |||
uint8_t G2out[2 * CRYPTO_BYTES]; // contains secret data | |||
uint8_t *seedSEprime = &G2out[0]; // contains secret data | |||
uint8_t *kprime = &G2out[CRYPTO_BYTES]; // contains secret data | |||
uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES]; // contains secret data via Fin_k | |||
uint8_t *Fin_ct = &Fin[0]; | |||
uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES]; // contains secret data | |||
uint8_t shake_input_seedSEprime[1 + CRYPTO_BYTES]; // contains secret data | |||
for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) { | |||
S[i] = sk_S[2 * i] | (sk_S[2 * i + 1] << 8); | |||
} | |||
// Compute W = C - Bp*S (mod q), and decode the randomness mu | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_unpack(Bp, PARAMS_N * PARAMS_NBAR, ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, PARAMS_LOGQ); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_unpack(C, PARAMS_NBAR * PARAMS_NBAR, ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, PARAMS_LOGQ); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_bs(W, Bp, S); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sub(W, C, W); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_key_decode((uint16_t *)muprime, W); | |||
// Generate (seedSE' || k') = G_2(pkh || mu') | |||
memcpy(pkh, sk_pkh, BYTES_PKHASH); | |||
shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU); | |||
// Generate Sp and Ep, and compute BBp = Sp*A + Ep. Generate A on-the-fly | |||
shake_input_seedSEprime[0] = 0x96; | |||
memcpy(&shake_input_seedSEprime[1], seedSEprime, CRYPTO_BYTES); | |||
shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSEprime, 1 + CRYPTO_BYTES); | |||
for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) { | |||
Sp[i] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_LE_TO_UINT16(Sp[i]); | |||
} | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sa_plus_e(BBp, Sp, Ep, pk_seedA); | |||
// Generate Epp, and compute W = Sp*B + Epp | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sb_plus_e(W, B, Sp, Epp); | |||
// Encode mu, and compute CC = W + enc(mu') (mod q) | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_key_encode(CC, (uint16_t *)muprime); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_add(CC, W, CC); | |||
// Prepare input to F | |||
memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES); | |||
// Reducing BBp modulo q | |||
for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) { | |||
BBp[i] = BBp[i] & ((1 << PARAMS_LOGQ) - 1); | |||
} | |||
// If (Bp == BBp & C == CC) then ss = F(ct || k'), else ss = F(ct || s) | |||
// Needs to avoid branching on secret data as per: | |||
// Qian Guo, Thomas Johansson, Alexander Nilsson. A key-recovery timing attack on post-quantum | |||
// primitives using the Fujisaki-Okamoto transformation and its application on FrodoKEM. In CRYPTO 2020. | |||
int8_t selector = PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_verify(Bp, BBp, PARAMS_N * PARAMS_NBAR) | PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_verify(C, CC, PARAMS_NBAR * PARAMS_NBAR); | |||
// If (selector == 0) then load k' to do ss = F(ct || k'), else if (selector == -1) load s to do ss = F(ct || s) | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_select((uint8_t *)Fin_k, (uint8_t *)kprime, (uint8_t *)sk_s, CRYPTO_BYTES, selector); | |||
shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES); | |||
// Cleanup: | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)W, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(muprime, BYTES_MU); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES); | |||
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(shake_input_seedSEprime, 1 + CRYPTO_BYTES); | |||
return 0; | |||
} |
@@ -1,108 +0,0 @@ | |||
/******************************************************************************************** | |||
* FrodoKEM: Learning with Errors Key Encapsulation | |||
* | |||
* Abstract: matrix arithmetic functions used by the KEM | |||
*********************************************************************************************/ | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "fips202.h" | |||
#include "api.h" | |||
#include "common.h" | |||
#include "params.h" | |||
int PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) { | |||
// Generate-and-multiply: generate matrix A (N x N) row-wise, multiply by s on the right. | |||
// Inputs: s, e (N x N_BAR) | |||
// Output: out = A*s + e (N x N_BAR) | |||
int j, k; | |||
uint16_t i; | |||
int16_t a_row[4 * PARAMS_N]; | |||
for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) { | |||
*((uint32_t *)&out[i]) = *((uint32_t *)&e[i]); | |||
} | |||
uint8_t seed_A_separated[2 + BYTES_SEED_A]; | |||
uint16_t *seed_A_origin = (uint16_t *)&seed_A_separated; | |||
memcpy(&seed_A_separated[2], seed_A, BYTES_SEED_A); | |||
for (i = 0; i < PARAMS_N; i += 4) { | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(i + 0); | |||
shake128((unsigned char *)(a_row + 0 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(i + 1); | |||
shake128((unsigned char *)(a_row + 1 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(i + 2); | |||
shake128((unsigned char *)(a_row + 2 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(i + 3); | |||
shake128((unsigned char *)(a_row + 3 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
for (k = 0; k < 4 * PARAMS_N; k++) { | |||
a_row[k] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_LE_TO_UINT16(a_row[k]); | |||
} | |||
for (k = 0; k < PARAMS_NBAR; k++) { | |||
uint16_t sum[4] = {0}; | |||
for (j = 0; j < PARAMS_N; j++) { // Matrix-vector multiplication | |||
uint16_t sp = s[k * PARAMS_N + j]; | |||
sum[0] += a_row[0 * PARAMS_N + j] * sp; // Go through four lines with same s | |||
sum[1] += a_row[1 * PARAMS_N + j] * sp; | |||
sum[2] += a_row[2 * PARAMS_N + j] * sp; | |||
sum[3] += a_row[3 * PARAMS_N + j] * sp; | |||
} | |||
out[(i + 0)*PARAMS_NBAR + k] += sum[0]; | |||
out[(i + 2)*PARAMS_NBAR + k] += sum[2]; | |||
out[(i + 1)*PARAMS_NBAR + k] += sum[1]; | |||
out[(i + 3)*PARAMS_NBAR + k] += sum[3]; | |||
} | |||
} | |||
return 1; | |||
} | |||
int PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) { | |||
// Generate-and-multiply: generate matrix A (N x N) column-wise, multiply by s' on the left. | |||
// Inputs: s', e' (N_BAR x N) | |||
// Output: out = s'*A + e' (N_BAR x N) | |||
int i, j; | |||
uint16_t kk; | |||
for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) { | |||
*((uint32_t *)&out[i]) = *((uint32_t *)&e[i]); | |||
} | |||
int t = 0; | |||
uint16_t a_cols[4 * PARAMS_N]; | |||
int k; | |||
uint8_t seed_A_separated[2 + BYTES_SEED_A]; | |||
uint16_t *seed_A_origin = (uint16_t *)&seed_A_separated; | |||
memcpy(&seed_A_separated[2], seed_A, BYTES_SEED_A); | |||
for (kk = 0; kk < PARAMS_N; kk += 4) { | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(kk + 0); | |||
shake128((unsigned char *)(a_cols + 0 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(kk + 1); | |||
shake128((unsigned char *)(a_cols + 1 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(kk + 2); | |||
shake128((unsigned char *)(a_cols + 2 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(kk + 3); | |||
shake128((unsigned char *)(a_cols + 3 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
for (i = 0; i < 4 * PARAMS_N; i++) { | |||
a_cols[i] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_LE_TO_UINT16(a_cols[i]); | |||
} | |||
for (i = 0; i < PARAMS_NBAR; i++) { | |||
uint16_t sum[PARAMS_N] = {0}; | |||
for (j = 0; j < 4; j++) { | |||
uint16_t sp = s[i * PARAMS_N + kk + j]; | |||
for (k = 0; k < PARAMS_N; k++) { // Matrix-vector multiplication | |||
sum[k] += (uint16_t)(sp * (uint32_t)a_cols[(t + j) * PARAMS_N + k]); | |||
} | |||
} | |||
for (k = 0; k < PARAMS_N; k++) { | |||
out[i * PARAMS_N + k] += sum[k]; | |||
} | |||
} | |||
} | |||
return 1; | |||
} |
@@ -1,35 +0,0 @@ | |||
/******************************************************************************************** | |||
* FrodoKEM: Learning with Errors Key Encapsulation | |||
* | |||
* Abstract: noise sampling functions | |||
*********************************************************************************************/ | |||
#include <stdint.h> | |||
#include "api.h" | |||
#include "common.h" | |||
#include "params.h" | |||
static const uint16_t CDF_TABLE[CDF_TABLE_LEN] = CDF_TABLE_DATA; | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(uint16_t *s, size_t n) { | |||
// Fills vector s with n samples from the noise distribution which requires 16 bits to sample. | |||
// The distribution is specified by its CDF. | |||
// Input: pseudo-random values (2*n bytes) passed in s. The input is overwritten by the output. | |||
size_t i; | |||
unsigned int j; | |||
for (i = 0; i < n; ++i) { | |||
uint16_t sample = 0; | |||
uint16_t prnd = s[i] >> 1; // Drop the least significant bit | |||
uint16_t sign = s[i] & 0x1; // Pick the least significant bit | |||
// No need to compare with the last value. | |||
for (j = 0; j < (unsigned int)(CDF_TABLE_LEN - 1); j++) { | |||
// Constant time comparison: 1 if CDF_TABLE[j] < s, 0 otherwise. Uses the fact that CDF_TABLE[j] and s fit in 15 bits. | |||
sample += (uint16_t)(CDF_TABLE[j] - prnd) >> 15; | |||
} | |||
// Assuming that sign is either 0 or 1, flips sample iff sign = 1 | |||
s[i] = ((-sign) ^ sample) + sign; | |||
} | |||
} |
@@ -1,27 +0,0 @@ | |||
#ifndef PARAMS_H | |||
#define PARAMS_H | |||
#define CRYPTO_SECRETKEYBYTES PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_SECRETKEYBYTES | |||
#define CRYPTO_PUBLICKEYBYTES PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_PUBLICKEYBYTES | |||
#define CRYPTO_BYTES PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_BYTES | |||
#define CRYPTO_CIPHERTEXTBYTES PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_CIPHERTEXTBYTES | |||
#define PARAMS_N 640 | |||
#define PARAMS_NBAR 8 | |||
#define PARAMS_LOGQ 15 | |||
#define PARAMS_Q (1 << PARAMS_LOGQ) | |||
#define PARAMS_EXTRACTED_BITS 2 | |||
#define PARAMS_STRIPE_STEP 8 | |||
#define PARAMS_PARALLEL 4 | |||
#define BYTES_SEED_A 16 | |||
#define BYTES_MU ((PARAMS_EXTRACTED_BITS * PARAMS_NBAR * PARAMS_NBAR) / 8) | |||
#define BYTES_PKHASH CRYPTO_BYTES | |||
// Selecting SHAKE XOF function for the KEM and noise sampling | |||
#define shake shake128 | |||
// CDF table | |||
#define CDF_TABLE_DATA {4643, 13363, 20579, 25843, 29227, 31145, 32103, 32525, 32689, 32745, 32762, 32766, 32767} | |||
#define CDF_TABLE_LEN 13 | |||
#endif |
@@ -1,264 +0,0 @@ | |||
/******************************************************************************************** | |||
* FrodoKEM: Learning with Errors Key Encapsulation | |||
* | |||
* Abstract: additional functions for FrodoKEM | |||
*********************************************************************************************/ | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "api.h" | |||
#include "common.h" | |||
#include "params.h" | |||
static inline uint8_t min(uint8_t x, uint8_t y) { | |||
if (x < y) { | |||
return x; | |||
} | |||
return y; | |||
} | |||
uint16_t PQCLEAN_FRODOKEM640SHAKE_CLEAN_LE_TO_UINT16(uint16_t n) { | |||
return (((uint8_t *) &n)[0] | (((uint8_t *) &n)[1] << 8)); | |||
} | |||
uint16_t PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(uint16_t n) { | |||
uint16_t y; | |||
uint8_t *z = (uint8_t *) &y; | |||
z[0] = n & 0xFF; | |||
z[1] = (n & 0xFF00) >> 8; | |||
return y; | |||
} | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s) { | |||
// Multiply by s on the right | |||
// Inputs: b (N_BAR x N), s (N x N_BAR) | |||
// Output: out = b*s (N_BAR x N_BAR) | |||
int i, j, k; | |||
for (i = 0; i < PARAMS_NBAR; i++) { | |||
for (j = 0; j < PARAMS_NBAR; j++) { | |||
out[i * PARAMS_NBAR + j] = 0; | |||
for (k = 0; k < PARAMS_N; k++) { | |||
out[i * PARAMS_NBAR + j] += (uint16_t)(b[i * PARAMS_N + k] * (uint32_t)s[j * PARAMS_N + k]); | |||
} | |||
out[i * PARAMS_NBAR + j] = (uint32_t)(out[i * PARAMS_NBAR + j]) & ((1 << PARAMS_LOGQ) - 1); | |||
} | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e) { | |||
// Multiply by s on the left | |||
// Inputs: b (N x N_BAR), s (N_BAR x N), e (N_BAR x N_BAR) | |||
// Output: out = s*b + e (N_BAR x N_BAR) | |||
int i, j, k; | |||
for (k = 0; k < PARAMS_NBAR; k++) { | |||
for (i = 0; i < PARAMS_NBAR; i++) { | |||
out[k * PARAMS_NBAR + i] = e[k * PARAMS_NBAR + i]; | |||
for (j = 0; j < PARAMS_N; j++) { | |||
out[k * PARAMS_NBAR + i] += (uint16_t)(s[k * PARAMS_N + j] * (uint32_t)b[j * PARAMS_NBAR + i]); | |||
} | |||
out[k * PARAMS_NBAR + i] = (uint32_t)(out[k * PARAMS_NBAR + i]) & ((1 << PARAMS_LOGQ) - 1); | |||
} | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b) { | |||
// Add a and b | |||
// Inputs: a, b (N_BAR x N_BAR) | |||
// Output: c = a + b | |||
for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) { | |||
out[i] = (a[i] + b[i]) & ((1 << PARAMS_LOGQ) - 1); | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b) { | |||
// Subtract a and b | |||
// Inputs: a, b (N_BAR x N_BAR) | |||
// Output: c = a - b | |||
for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) { | |||
out[i] = (a[i] - b[i]) & ((1 << PARAMS_LOGQ) - 1); | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_key_encode(uint16_t *out, const uint16_t *in) { | |||
// Encoding | |||
unsigned int i, j, npieces_word = 8; | |||
unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8; | |||
uint64_t temp, mask = ((uint64_t)1 << PARAMS_EXTRACTED_BITS) - 1; | |||
uint16_t *pos = out; | |||
for (i = 0; i < nwords; i++) { | |||
temp = 0; | |||
for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) { | |||
temp |= ((uint64_t)((uint8_t *)in)[i * PARAMS_EXTRACTED_BITS + j]) << (8 * j); | |||
} | |||
for (j = 0; j < npieces_word; j++) { | |||
*pos = (uint16_t)((temp & mask) << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS)); | |||
temp >>= PARAMS_EXTRACTED_BITS; | |||
pos++; | |||
} | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_key_decode(uint16_t *out, const uint16_t *in) { | |||
// Decoding | |||
unsigned int i, j, index = 0, npieces_word = 8; | |||
unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8; | |||
uint16_t temp, maskex = ((uint16_t)1 << PARAMS_EXTRACTED_BITS) - 1, maskq = ((uint16_t)1 << PARAMS_LOGQ) - 1; | |||
uint8_t *pos = (uint8_t *)out; | |||
uint64_t templong; | |||
for (i = 0; i < nwords; i++) { | |||
templong = 0; | |||
for (j = 0; j < npieces_word; j++) { // temp = floor(in*2^{-11}+0.5) | |||
temp = ((in[index] & maskq) + (1 << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS - 1))) >> (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS); | |||
templong |= ((uint64_t)(temp & maskex)) << (PARAMS_EXTRACTED_BITS * j); | |||
index++; | |||
} | |||
for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) { | |||
pos[i * PARAMS_EXTRACTED_BITS + j] = (templong >> (8 * j)) & 0xFF; | |||
} | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb) { | |||
// Pack the input uint16 vector into a char output vector, copying lsb bits from each input element. | |||
// If inlen * lsb / 8 > outlen, only outlen * 8 bits are copied. | |||
memset(out, 0, outlen); | |||
size_t i = 0; // whole bytes already filled in | |||
size_t j = 0; // whole uint16_t already copied | |||
uint16_t w = 0; // the leftover, not yet copied | |||
uint8_t bits = 0; // the number of lsb in w | |||
while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) { | |||
/* | |||
in: | | |********|********| | |||
^ | |||
j | |||
w : | ****| | |||
^ | |||
bits | |||
out:|**|**|**|**|**|**|**|**|* | | |||
^^ | |||
ib | |||
*/ | |||
uint8_t b = 0; // bits in out[i] already filled in | |||
while (b < 8) { | |||
int nbits = min(8 - b, bits); | |||
uint16_t mask = (1 << nbits) - 1; | |||
uint8_t t = (uint8_t) ((w >> (bits - nbits)) & mask); // the bits to copy from w to out | |||
out[i] = out[i] + (t << (8 - b - nbits)); | |||
b += (uint8_t) nbits; | |||
bits -= (uint8_t) nbits; | |||
w &= ~(mask << bits); // not strictly necessary; mostly for debugging | |||
if (bits == 0) { | |||
if (j < inlen) { | |||
w = in[j]; | |||
bits = lsb; | |||
j++; | |||
} else { | |||
break; // the input vector is exhausted | |||
} | |||
} | |||
} | |||
if (b == 8) { // out[i] is filled in | |||
i++; | |||
} | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb) { | |||
// Unpack the input char vector into a uint16_t output vector, copying lsb bits | |||
// for each output element from input. outlen must be at least ceil(inlen * 8 / lsb). | |||
memset(out, 0, outlen * sizeof(uint16_t)); | |||
size_t i = 0; // whole uint16_t already filled in | |||
size_t j = 0; // whole bytes already copied | |||
uint8_t w = 0; // the leftover, not yet copied | |||
uint8_t bits = 0; // the number of lsb bits of w | |||
while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) { | |||
/* | |||
in: | | | | | | |**|**|... | |||
^ | |||
j | |||
w : | *| | |||
^ | |||
bits | |||
out:| *****| *****| *** | |... | |||
^ ^ | |||
i b | |||
*/ | |||
uint8_t b = 0; // bits in out[i] already filled in | |||
while (b < lsb) { | |||
int nbits = min(lsb - b, bits); | |||
uint16_t mask = (1 << nbits) - 1; | |||
uint8_t t = (w >> (bits - nbits)) & mask; // the bits to copy from w to out | |||
out[i] = out[i] + (t << (lsb - b - nbits)); | |||
b += (uint8_t) nbits; | |||
bits -= (uint8_t) nbits; | |||
w &= ~(mask << bits); // not strictly necessary; mostly for debugging | |||
if (bits == 0) { | |||
if (j < inlen) { | |||
w = in[j]; | |||
bits = 8; | |||
j++; | |||
} else { | |||
break; // the input vector is exhausted | |||
} | |||
} | |||
} | |||
if (b == lsb) { // out[i] is filled in | |||
i++; | |||
} | |||
} | |||
} | |||
int8_t PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len) { | |||
// Compare two arrays in constant time. | |||
// Returns 0 if the byte arrays are equal, -1 otherwise. | |||
uint16_t r = 0; | |||
for (size_t i = 0; i < len; i++) { | |||
r |= a[i] ^ b[i]; | |||
} | |||
r = (-(int16_t)(r >> 1) | -(int16_t)(r & 1)) >> (8 * sizeof(uint16_t) -1); | |||
return (int8_t)r; | |||
} | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector) { | |||
// Select one of the two input arrays to be moved to r | |||
// If (selector == 0) then load r with a, else if (selector == -1) load r with b | |||
for (size_t i = 0; i < len; i++) { | |||
r[i] = (~selector & a[i]) | (selector & b[i]); | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(uint8_t *mem, size_t n) { | |||
// Clear 8-bit bytes from memory. "n" indicates the number of bytes to be zeroed. | |||
// This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing. | |||
volatile uint8_t *v = mem; | |||
for (size_t i = 0; i < n; i++) { | |||
v[i] = 0; | |||
} | |||
} |
@@ -1,10 +0,0 @@ | |||
set( | |||
SRC_CLEAN_FRODOKEM976SHAKE | |||
kem.c | |||
matrix_shake.c | |||
noise.c | |||
util.c | |||
) | |||
define_kem_alg(frodo976shake_clean | |||
PQCLEAN_FRODOKEM976SHAKE_OPT "${SRC_CLEAN_FRODOKEM976SHAKE}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -1,20 +0,0 @@ | |||
#ifndef PQCLEAN_FRODOKEM976SHAKE_CLEAN_API_H | |||
#define PQCLEAN_FRODOKEM976SHAKE_CLEAN_API_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_SECRETKEYBYTES 31296 // sizeof(s) + CRYPTO_PUBLICKEYBYTES + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH | |||
#define PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_PUBLICKEYBYTES 15632 // sizeof(seed_A) + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 | |||
#define PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_BYTES 24 | |||
#define PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_CIPHERTEXTBYTES 15744 // (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + (PARAMS_LOGQ*PARAMS_NBAR*PARAMS_NBAR)/8 | |||
#define PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_ALGNAME "FrodoKEM-976-SHAKE" | |||
int PQCLEAN_FRODOKEM976SHAKE_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk); | |||
int PQCLEAN_FRODOKEM976SHAKE_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); | |||
int PQCLEAN_FRODOKEM976SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); | |||
#endif |
@@ -1,21 +0,0 @@ | |||
#ifndef COMMON_H | |||
#define COMMON_H | |||
int PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); | |||
int PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(uint16_t *s, size_t n); | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s); | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e); | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b); | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b); | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_key_encode(uint16_t *out, const uint16_t *in); | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_key_decode(uint16_t *out, const uint16_t *in); | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb); | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb); | |||
int8_t PQCLEAN_FRODOKEM976SHAKE_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len); | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector); | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(uint8_t *mem, size_t n); | |||
uint16_t PQCLEAN_FRODOKEM976SHAKE_CLEAN_LE_TO_UINT16(uint16_t n); | |||
uint16_t PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(uint16_t n); | |||
#endif |
@@ -1,237 +0,0 @@ | |||
/******************************************************************************************** | |||
* FrodoKEM: Learning with Errors Key Encapsulation | |||
* | |||
* Abstract: Key Encapsulation Mechanism (KEM) based on Frodo | |||
*********************************************************************************************/ | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "fips202.h" | |||
#include "randombytes.h" | |||
#include "api.h" | |||
#include "common.h" | |||
#include "params.h" | |||
int PQCLEAN_FRODOKEM976SHAKE_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { | |||
// FrodoKEM's key generation | |||
// Outputs: public key pk ( BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 bytes) | |||
// secret key sk (CRYPTO_BYTES + BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH bytes) | |||
uint8_t *pk_seedA = &pk[0]; | |||
uint8_t *pk_b = &pk[BYTES_SEED_A]; | |||
uint8_t *sk_s = &sk[0]; | |||
uint8_t *sk_pk = &sk[CRYPTO_BYTES]; | |||
uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES]; | |||
uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR]; | |||
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t S[2 * PARAMS_N * PARAMS_NBAR] = {0}; // contains secret data | |||
uint16_t *E = &S[PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
uint8_t randomness[2 * CRYPTO_BYTES + BYTES_SEED_A]; // contains secret data via randomness_s and randomness_seedSE | |||
uint8_t *randomness_s = &randomness[0]; // contains secret data | |||
uint8_t *randomness_seedSE = &randomness[CRYPTO_BYTES]; // contains secret data | |||
uint8_t *randomness_z = &randomness[2 * CRYPTO_BYTES]; | |||
uint8_t shake_input_seedSE[1 + CRYPTO_BYTES]; // contains secret data | |||
// Generate the secret value s, the seed for S and E, and the seed for the seed for A. Add seed_A to the public key | |||
randombytes(randomness, CRYPTO_BYTES + CRYPTO_BYTES + BYTES_SEED_A); | |||
shake(pk_seedA, BYTES_SEED_A, randomness_z, BYTES_SEED_A); | |||
// Generate S and E, and compute B = A*S + E. Generate A on-the-fly | |||
shake_input_seedSE[0] = 0x5F; | |||
memcpy(&shake_input_seedSE[1], randomness_seedSE, CRYPTO_BYTES); | |||
shake((uint8_t *)S, 2 * PARAMS_N * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES); | |||
for (size_t i = 0; i < 2 * PARAMS_N * PARAMS_NBAR; i++) { | |||
S[i] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_LE_TO_UINT16(S[i]); | |||
} | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(S, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(E, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_as_plus_e(B, S, E, pk); | |||
// Encode the second part of the public key | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_pack(pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, B, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ); | |||
// Add s, pk and S to the secret key | |||
memcpy(sk_s, randomness_s, CRYPTO_BYTES); | |||
memcpy(sk_pk, pk, CRYPTO_PUBLICKEYBYTES); | |||
for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) { | |||
S[i] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(S[i]); | |||
} | |||
memcpy(sk_S, S, 2 * PARAMS_N * PARAMS_NBAR); | |||
// Add H(pk) to the secret key | |||
shake(sk_pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES); | |||
// Cleanup: | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)E, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(randomness, 2 * CRYPTO_BYTES); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES); | |||
return 0; | |||
} | |||
int PQCLEAN_FRODOKEM976SHAKE_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) { | |||
// FrodoKEM's key encapsulation | |||
const uint8_t *pk_seedA = &pk[0]; | |||
const uint8_t *pk_b = &pk[BYTES_SEED_A]; | |||
uint8_t *ct_c1 = &ct[0]; | |||
uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8]; | |||
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t V[PARAMS_NBAR * PARAMS_NBAR] = {0}; // contains secret data | |||
uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0}; | |||
uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data | |||
uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
uint8_t G2in[BYTES_PKHASH + BYTES_MU]; // contains secret data via mu | |||
uint8_t *pkh = &G2in[0]; | |||
uint8_t *mu = &G2in[BYTES_PKHASH]; // contains secret data | |||
uint8_t G2out[2 * CRYPTO_BYTES]; // contains secret data | |||
uint8_t *seedSE = &G2out[0]; // contains secret data | |||
uint8_t *k = &G2out[CRYPTO_BYTES]; // contains secret data | |||
uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES]; // contains secret data via Fin_k | |||
uint8_t *Fin_ct = &Fin[0]; | |||
uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES]; // contains secret data | |||
uint8_t shake_input_seedSE[1 + CRYPTO_BYTES]; // contains secret data | |||
// pkh <- G_1(pk), generate random mu, compute (seedSE || k) = G_2(pkh || mu) | |||
shake(pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES); | |||
randombytes(mu, BYTES_MU); | |||
shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU); | |||
// Generate Sp and Ep, and compute Bp = Sp*A + Ep. Generate A on-the-fly | |||
shake_input_seedSE[0] = 0x96; | |||
memcpy(&shake_input_seedSE[1], seedSE, CRYPTO_BYTES); | |||
shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES); | |||
for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) { | |||
Sp[i] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_LE_TO_UINT16(Sp[i]); | |||
} | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sa_plus_e(Bp, Sp, Ep, pk_seedA); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_pack(ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, Bp, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ); | |||
// Generate Epp, and compute V = Sp*B + Epp | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sb_plus_e(V, B, Sp, Epp); | |||
// Encode mu, and compute C = V + enc(mu) (mod q) | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_key_encode(C, (uint16_t *)mu); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_add(C, V, C); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_pack(ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, C, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOGQ); | |||
// Compute ss = F(ct||KK) | |||
memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES); | |||
memcpy(Fin_k, k, CRYPTO_BYTES); | |||
shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES); | |||
// Cleanup: | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)V, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(mu, BYTES_MU); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES); | |||
return 0; | |||
} | |||
int PQCLEAN_FRODOKEM976SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) { | |||
// FrodoKEM's key decapsulation | |||
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t W[PARAMS_NBAR * PARAMS_NBAR] = {0}; // contains secret data | |||
uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0}; | |||
uint16_t CC[PARAMS_NBAR * PARAMS_NBAR] = {0}; | |||
uint16_t BBp[PARAMS_N * PARAMS_NBAR] = {0}; | |||
uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data | |||
uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
const uint8_t *ct_c1 = &ct[0]; | |||
const uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8]; | |||
const uint8_t *sk_s = &sk[0]; | |||
const uint8_t *sk_pk = &sk[CRYPTO_BYTES]; | |||
const uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES]; | |||
uint16_t S[PARAMS_N * PARAMS_NBAR]; // contains secret data | |||
const uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR]; | |||
const uint8_t *pk_seedA = &sk_pk[0]; | |||
const uint8_t *pk_b = &sk_pk[BYTES_SEED_A]; | |||
uint8_t G2in[BYTES_PKHASH + BYTES_MU]; // contains secret data via muprime | |||
uint8_t *pkh = &G2in[0]; | |||
uint8_t *muprime = &G2in[BYTES_PKHASH]; // contains secret data | |||
uint8_t G2out[2 * CRYPTO_BYTES]; // contains secret data | |||
uint8_t *seedSEprime = &G2out[0]; // contains secret data | |||
uint8_t *kprime = &G2out[CRYPTO_BYTES]; // contains secret data | |||
uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES]; // contains secret data via Fin_k | |||
uint8_t *Fin_ct = &Fin[0]; | |||
uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES]; // contains secret data | |||
uint8_t shake_input_seedSEprime[1 + CRYPTO_BYTES]; // contains secret data | |||
for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) { | |||
S[i] = sk_S[2 * i] | (sk_S[2 * i + 1] << 8); | |||
} | |||
// Compute W = C - Bp*S (mod q), and decode the randomness mu | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_unpack(Bp, PARAMS_N * PARAMS_NBAR, ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, PARAMS_LOGQ); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_unpack(C, PARAMS_NBAR * PARAMS_NBAR, ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, PARAMS_LOGQ); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_bs(W, Bp, S); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sub(W, C, W); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_key_decode((uint16_t *)muprime, W); | |||
// Generate (seedSE' || k') = G_2(pkh || mu') | |||
memcpy(pkh, sk_pkh, BYTES_PKHASH); | |||
shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU); | |||
// Generate Sp and Ep, and compute BBp = Sp*A + Ep. Generate A on-the-fly | |||
shake_input_seedSEprime[0] = 0x96; | |||
memcpy(&shake_input_seedSEprime[1], seedSEprime, CRYPTO_BYTES); | |||
shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSEprime, 1 + CRYPTO_BYTES); | |||
for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) { | |||
Sp[i] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_LE_TO_UINT16(Sp[i]); | |||
} | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sa_plus_e(BBp, Sp, Ep, pk_seedA); | |||
// Generate Epp, and compute W = Sp*B + Epp | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sb_plus_e(W, B, Sp, Epp); | |||
// Encode mu, and compute CC = W + enc(mu') (mod q) | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_key_encode(CC, (uint16_t *)muprime); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_add(CC, W, CC); | |||
// Prepare input to F | |||
memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES); | |||
// Reducing BBp modulo q | |||
for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) { | |||
BBp[i] = BBp[i] & ((1 << PARAMS_LOGQ) - 1); | |||
} | |||
// If (Bp == BBp & C == CC) then ss = F(ct || k'), else ss = F(ct || s) | |||
// Needs to avoid branching on secret data as per: | |||
// Qian Guo, Thomas Johansson, Alexander Nilsson. A key-recovery timing attack on post-quantum | |||
// primitives using the Fujisaki-Okamoto transformation and its application on FrodoKEM. In CRYPTO 2020. | |||
int8_t selector = PQCLEAN_FRODOKEM976SHAKE_CLEAN_ct_verify(Bp, BBp, PARAMS_N * PARAMS_NBAR) | PQCLEAN_FRODOKEM976SHAKE_CLEAN_ct_verify(C, CC, PARAMS_NBAR * PARAMS_NBAR); | |||
// If (selector == 0) then load k' to do ss = F(ct || k'), else if (selector == -1) load s to do ss = F(ct || s) | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_ct_select((uint8_t *)Fin_k, (uint8_t *)kprime, (uint8_t *)sk_s, CRYPTO_BYTES, selector); | |||
shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES); | |||
// Cleanup: | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)W, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(muprime, BYTES_MU); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES); | |||
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(shake_input_seedSEprime, 1 + CRYPTO_BYTES); | |||
return 0; | |||
} |
@@ -1,108 +0,0 @@ | |||
/******************************************************************************************** | |||
* FrodoKEM: Learning with Errors Key Encapsulation | |||
* | |||
* Abstract: matrix arithmetic functions used by the KEM | |||
*********************************************************************************************/ | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "fips202.h" | |||
#include "api.h" | |||
#include "common.h" | |||
#include "params.h" | |||
int PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) { | |||
// Generate-and-multiply: generate matrix A (N x N) row-wise, multiply by s on the right. | |||
// Inputs: s, e (N x N_BAR) | |||
// Output: out = A*s + e (N x N_BAR) | |||
int j, k; | |||
uint16_t i; | |||
int16_t a_row[4 * PARAMS_N]; | |||
for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) { | |||
*((uint32_t *)&out[i]) = *((uint32_t *)&e[i]); | |||
} | |||
uint8_t seed_A_separated[2 + BYTES_SEED_A]; | |||
uint16_t *seed_A_origin = (uint16_t *)&seed_A_separated; | |||
memcpy(&seed_A_separated[2], seed_A, BYTES_SEED_A); | |||
for (i = 0; i < PARAMS_N; i += 4) { | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(i + 0); | |||
shake128((unsigned char *)(a_row + 0 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(i + 1); | |||
shake128((unsigned char *)(a_row + 1 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(i + 2); | |||
shake128((unsigned char *)(a_row + 2 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(i + 3); | |||
shake128((unsigned char *)(a_row + 3 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
for (k = 0; k < 4 * PARAMS_N; k++) { | |||
a_row[k] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_LE_TO_UINT16(a_row[k]); | |||
} | |||
for (k = 0; k < PARAMS_NBAR; k++) { | |||
uint16_t sum[4] = {0}; | |||
for (j = 0; j < PARAMS_N; j++) { // Matrix-vector multiplication | |||
uint16_t sp = s[k * PARAMS_N + j]; | |||
sum[0] += a_row[0 * PARAMS_N + j] * sp; // Go through four lines with same s | |||
sum[1] += a_row[1 * PARAMS_N + j] * sp; | |||
sum[2] += a_row[2 * PARAMS_N + j] * sp; | |||
sum[3] += a_row[3 * PARAMS_N + j] * sp; | |||
} | |||
out[(i + 0)*PARAMS_NBAR + k] += sum[0]; | |||
out[(i + 2)*PARAMS_NBAR + k] += sum[2]; | |||
out[(i + 1)*PARAMS_NBAR + k] += sum[1]; | |||
out[(i + 3)*PARAMS_NBAR + k] += sum[3]; | |||
} | |||
} | |||
return 1; | |||
} | |||
int PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) { | |||
// Generate-and-multiply: generate matrix A (N x N) column-wise, multiply by s' on the left. | |||
// Inputs: s', e' (N_BAR x N) | |||
// Output: out = s'*A + e' (N_BAR x N) | |||
int i, j; | |||
uint16_t kk; | |||
for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) { | |||
*((uint32_t *)&out[i]) = *((uint32_t *)&e[i]); | |||
} | |||
int t = 0; | |||
uint16_t a_cols[4 * PARAMS_N]; | |||
int k; | |||
uint8_t seed_A_separated[2 + BYTES_SEED_A]; | |||
uint16_t *seed_A_origin = (uint16_t *)&seed_A_separated; | |||
memcpy(&seed_A_separated[2], seed_A, BYTES_SEED_A); | |||
for (kk = 0; kk < PARAMS_N; kk += 4) { | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(kk + 0); | |||
shake128((unsigned char *)(a_cols + 0 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(kk + 1); | |||
shake128((unsigned char *)(a_cols + 1 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(kk + 2); | |||
shake128((unsigned char *)(a_cols + 2 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(kk + 3); | |||
shake128((unsigned char *)(a_cols + 3 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); | |||
for (i = 0; i < 4 * PARAMS_N; i++) { | |||
a_cols[i] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_LE_TO_UINT16(a_cols[i]); | |||
} | |||
for (i = 0; i < PARAMS_NBAR; i++) { | |||
uint16_t sum[PARAMS_N] = {0}; | |||
for (j = 0; j < 4; j++) { | |||
uint16_t sp = s[i * PARAMS_N + kk + j]; | |||
for (k = 0; k < PARAMS_N; k++) { // Matrix-vector multiplication | |||
sum[k] += (uint16_t)(sp * (uint32_t)a_cols[(t + j) * PARAMS_N + k]); | |||
} | |||
} | |||
for (k = 0; k < PARAMS_N; k++) { | |||
out[i * PARAMS_N + k] += sum[k]; | |||
} | |||
} | |||
} | |||
return 1; | |||
} |
@@ -1,35 +0,0 @@ | |||
/******************************************************************************************** | |||
* FrodoKEM: Learning with Errors Key Encapsulation | |||
* | |||
* Abstract: noise sampling functions | |||
*********************************************************************************************/ | |||
#include <stdint.h> | |||
#include "api.h" | |||
#include "common.h" | |||
#include "params.h" | |||
static const uint16_t CDF_TABLE[CDF_TABLE_LEN] = CDF_TABLE_DATA; | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(uint16_t *s, size_t n) { | |||
// Fills vector s with n samples from the noise distribution which requires 16 bits to sample. | |||
// The distribution is specified by its CDF. | |||
// Input: pseudo-random values (2*n bytes) passed in s. The input is overwritten by the output. | |||
size_t i; | |||
unsigned int j; | |||
for (i = 0; i < n; ++i) { | |||
uint16_t sample = 0; | |||
uint16_t prnd = s[i] >> 1; // Drop the least significant bit | |||
uint16_t sign = s[i] & 0x1; // Pick the least significant bit | |||
// No need to compare with the last value. | |||
for (j = 0; j < (unsigned int)(CDF_TABLE_LEN - 1); j++) { | |||
// Constant time comparison: 1 if CDF_TABLE[j] < s, 0 otherwise. Uses the fact that CDF_TABLE[j] and s fit in 15 bits. | |||
sample += (uint16_t)(CDF_TABLE[j] - prnd) >> 15; | |||
} | |||
// Assuming that sign is either 0 or 1, flips sample iff sign = 1 | |||
s[i] = ((-sign) ^ sample) + sign; | |||
} | |||
} |
@@ -1,27 +0,0 @@ | |||
#ifndef PARAMS_H | |||
#define PARAMS_H | |||
#define CRYPTO_SECRETKEYBYTES PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_SECRETKEYBYTES | |||
#define CRYPTO_PUBLICKEYBYTES PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_PUBLICKEYBYTES | |||
#define CRYPTO_BYTES PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_BYTES | |||
#define CRYPTO_CIPHERTEXTBYTES PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_CIPHERTEXTBYTES | |||
#define PARAMS_N 976 | |||
#define PARAMS_NBAR 8 | |||
#define PARAMS_LOGQ 16 | |||
#define PARAMS_Q (1 << PARAMS_LOGQ) | |||
#define PARAMS_EXTRACTED_BITS 3 | |||
#define PARAMS_STRIPE_STEP 8 | |||
#define PARAMS_PARALLEL 4 | |||
#define BYTES_SEED_A 16 | |||
#define BYTES_MU ((PARAMS_EXTRACTED_BITS * PARAMS_NBAR * PARAMS_NBAR) / 8) | |||
#define BYTES_PKHASH CRYPTO_BYTES | |||
// Selecting SHAKE XOF function for the KEM and noise sampling | |||
#define shake shake256 | |||
// CDF table | |||
#define CDF_TABLE_DATA {5638, 15915, 23689, 28571, 31116, 32217, 32613, 32731, 32760, 32766, 32767} | |||
#define CDF_TABLE_LEN 11 | |||
#endif |
@@ -1,264 +0,0 @@ | |||
/******************************************************************************************** | |||
* FrodoKEM: Learning with Errors Key Encapsulation | |||
* | |||
* Abstract: additional functions for FrodoKEM | |||
*********************************************************************************************/ | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "api.h" | |||
#include "common.h" | |||
#include "params.h" | |||
static inline uint8_t min(uint8_t x, uint8_t y) { | |||
if (x < y) { | |||
return x; | |||
} | |||
return y; | |||
} | |||
uint16_t PQCLEAN_FRODOKEM976SHAKE_CLEAN_LE_TO_UINT16(uint16_t n) { | |||
return (((uint8_t *) &n)[0] | (((uint8_t *) &n)[1] << 8)); | |||
} | |||
uint16_t PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(uint16_t n) { | |||
uint16_t y; | |||
uint8_t *z = (uint8_t *) &y; | |||
z[0] = n & 0xFF; | |||
z[1] = (n & 0xFF00) >> 8; | |||
return y; | |||
} | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s) { | |||
// Multiply by s on the right | |||
// Inputs: b (N_BAR x N), s (N x N_BAR) | |||
// Output: out = b*s (N_BAR x N_BAR) | |||
int i, j, k; | |||
for (i = 0; i < PARAMS_NBAR; i++) { | |||
for (j = 0; j < PARAMS_NBAR; j++) { | |||
out[i * PARAMS_NBAR + j] = 0; | |||
for (k = 0; k < PARAMS_N; k++) { | |||
out[i * PARAMS_NBAR + j] += (uint16_t)(b[i * PARAMS_N + k] * (uint32_t)s[j * PARAMS_N + k]); | |||
} | |||
out[i * PARAMS_NBAR + j] = (uint32_t)(out[i * PARAMS_NBAR + j]) & ((1 << PARAMS_LOGQ) - 1); | |||
} | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e) { | |||
// Multiply by s on the left | |||
// Inputs: b (N x N_BAR), s (N_BAR x N), e (N_BAR x N_BAR) | |||
// Output: out = s*b + e (N_BAR x N_BAR) | |||
int i, j, k; | |||
for (k = 0; k < PARAMS_NBAR; k++) { | |||
for (i = 0; i < PARAMS_NBAR; i++) { | |||
out[k * PARAMS_NBAR + i] = e[k * PARAMS_NBAR + i]; | |||
for (j = 0; j < PARAMS_N; j++) { | |||
out[k * PARAMS_NBAR + i] += (uint16_t)(s[k * PARAMS_N + j] * (uint32_t)b[j * PARAMS_NBAR + i]); | |||
} | |||
out[k * PARAMS_NBAR + i] = (uint32_t)(out[k * PARAMS_NBAR + i]) & ((1 << PARAMS_LOGQ) - 1); | |||
} | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b) { | |||
// Add a and b | |||
// Inputs: a, b (N_BAR x N_BAR) | |||
// Output: c = a + b | |||
for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) { | |||
out[i] = (a[i] + b[i]) & ((1 << PARAMS_LOGQ) - 1); | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b) { | |||
// Subtract a and b | |||
// Inputs: a, b (N_BAR x N_BAR) | |||
// Output: c = a - b | |||
for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) { | |||
out[i] = (a[i] - b[i]) & ((1 << PARAMS_LOGQ) - 1); | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_key_encode(uint16_t *out, const uint16_t *in) { | |||
// Encoding | |||
unsigned int i, j, npieces_word = 8; | |||
unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8; | |||
uint64_t temp, mask = ((uint64_t)1 << PARAMS_EXTRACTED_BITS) - 1; | |||
uint16_t *pos = out; | |||
for (i = 0; i < nwords; i++) { | |||
temp = 0; | |||
for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) { | |||
temp |= ((uint64_t)((uint8_t *)in)[i * PARAMS_EXTRACTED_BITS + j]) << (8 * j); | |||
} | |||
for (j = 0; j < npieces_word; j++) { | |||
*pos = (uint16_t)((temp & mask) << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS)); | |||
temp >>= PARAMS_EXTRACTED_BITS; | |||
pos++; | |||
} | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_key_decode(uint16_t *out, const uint16_t *in) { | |||
// Decoding | |||
unsigned int i, j, index = 0, npieces_word = 8; | |||
unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8; | |||
uint16_t temp, maskex = ((uint16_t)1 << PARAMS_EXTRACTED_BITS) - 1, maskq = ((uint16_t)1 << PARAMS_LOGQ) - 1; | |||
uint8_t *pos = (uint8_t *)out; | |||
uint64_t templong; | |||
for (i = 0; i < nwords; i++) { | |||
templong = 0; | |||
for (j = 0; j < npieces_word; j++) { // temp = floor(in*2^{-11}+0.5) | |||
temp = ((in[index] & maskq) + (1 << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS - 1))) >> (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS); | |||
templong |= ((uint64_t)(temp & maskex)) << (PARAMS_EXTRACTED_BITS * j); | |||
index++; | |||
} | |||
for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) { | |||
pos[i * PARAMS_EXTRACTED_BITS + j] = (templong >> (8 * j)) & 0xFF; | |||
} | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb) { | |||
// Pack the input uint16 vector into a char output vector, copying lsb bits from each input element. | |||
// If inlen * lsb / 8 > outlen, only outlen * 8 bits are copied. | |||
memset(out, 0, outlen); | |||
size_t i = 0; // whole bytes already filled in | |||
size_t j = 0; // whole uint16_t already copied | |||
uint16_t w = 0; // the leftover, not yet copied | |||
uint8_t bits = 0; // the number of lsb in w | |||
while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) { | |||
/* | |||
in: | | |********|********| | |||
^ | |||
j | |||
w : | ****| | |||
^ | |||
bits | |||
out:|**|**|**|**|**|**|**|**|* | | |||
^^ | |||
ib | |||
*/ | |||
uint8_t b = 0; // bits in out[i] already filled in | |||
while (b < 8) { | |||
int nbits = min(8 - b, bits); | |||
uint16_t mask = (1 << nbits) - 1; | |||
uint8_t t = (uint8_t) ((w >> (bits - nbits)) & mask); // the bits to copy from w to out | |||
out[i] = out[i] + (t << (8 - b - nbits)); | |||
b += (uint8_t) nbits; | |||
bits -= (uint8_t) nbits; | |||
w &= ~(mask << bits); // not strictly necessary; mostly for debugging | |||
if (bits == 0) { | |||
if (j < inlen) { | |||
w = in[j]; | |||
bits = lsb; | |||
j++; | |||
} else { | |||
break; // the input vector is exhausted | |||
} | |||
} | |||
} | |||
if (b == 8) { // out[i] is filled in | |||
i++; | |||
} | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb) { | |||
// Unpack the input char vector into a uint16_t output vector, copying lsb bits | |||
// for each output element from input. outlen must be at least ceil(inlen * 8 / lsb). | |||
memset(out, 0, outlen * sizeof(uint16_t)); | |||
size_t i = 0; // whole uint16_t already filled in | |||
size_t j = 0; // whole bytes already copied | |||
uint8_t w = 0; // the leftover, not yet copied | |||
uint8_t bits = 0; // the number of lsb bits of w | |||
while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) { | |||
/* | |||
in: | | | | | | |**|**|... | |||
^ | |||
j | |||
w : | *| | |||
^ | |||
bits | |||
out:| *****| *****| *** | |... | |||
^ ^ | |||
i b | |||
*/ | |||
uint8_t b = 0; // bits in out[i] already filled in | |||
while (b < lsb) { | |||
int nbits = min(lsb - b, bits); | |||
uint16_t mask = (1 << nbits) - 1; | |||
uint8_t t = (w >> (bits - nbits)) & mask; // the bits to copy from w to out | |||
out[i] = out[i] + (t << (lsb - b - nbits)); | |||
b += (uint8_t) nbits; | |||
bits -= (uint8_t) nbits; | |||
w &= ~(mask << bits); // not strictly necessary; mostly for debugging | |||
if (bits == 0) { | |||
if (j < inlen) { | |||
w = in[j]; | |||
bits = 8; | |||
j++; | |||
} else { | |||
break; // the input vector is exhausted | |||
} | |||
} | |||
} | |||
if (b == lsb) { // out[i] is filled in | |||
i++; | |||
} | |||
} | |||
} | |||
int8_t PQCLEAN_FRODOKEM976SHAKE_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len) { | |||
// Compare two arrays in constant time. | |||
// Returns 0 if the byte arrays are equal, -1 otherwise. | |||
uint16_t r = 0; | |||
for (size_t i = 0; i < len; i++) { | |||
r |= a[i] ^ b[i]; | |||
} | |||
r = (-(int16_t)(r >> 1) | -(int16_t)(r & 1)) >> (8 * sizeof(uint16_t) -1); | |||
return (int8_t)r; | |||
} | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector) { | |||
// Select one of the two input arrays to be moved to r | |||
// If (selector == 0) then load r with a, else if (selector == -1) load r with b | |||
for (size_t i = 0; i < len; i++) { | |||
r[i] = (~selector & a[i]) | (selector & b[i]); | |||
} | |||
} | |||
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(uint8_t *mem, size_t n) { | |||
// Clear 8-bit bytes from memory. "n" indicates the number of bytes to be zeroed. | |||
// This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing. | |||
volatile uint8_t *v = mem; | |||
for (size_t i = 0; i < n; i++) { | |||
v[i] = 0; | |||
} | |||
} |
@@ -1,32 +0,0 @@ | |||
set( | |||
SRC_AVX2_NTRUHPS2048509 | |||
cmov.c | |||
crypto_sort_int32.c | |||
kem.c | |||
owcpa.c | |||
pack3.c | |||
packq.c | |||
poly.c | |||
poly_lift.c | |||
poly_mod_3_Phi_n.s | |||
poly_mod_q_Phi_n.s | |||
poly_r2_inv.c | |||
poly_r2_mul.s | |||
poly_rq_mul.s | |||
poly_rq_to_s3.s | |||
poly_s3_inv.c | |||
sample.c | |||
sample_iid.c | |||
square_126_509_shufbytes.s | |||
square_1_509_patience.s | |||
square_15_509_shufbytes.s | |||
square_252_509_shufbytes.s | |||
square_30_509_shufbytes.s | |||
square_3_509_patience.s | |||
square_63_509_shufbytes.s | |||
square_6_509_patience.s | |||
vec32_sample_iid.s | |||
) | |||
define_kem_alg(ntruhps2048509_avx2 | |||
PQCLEAN_NTRUHPS2048509_AVX2 "${SRC_AVX2_NTRUHPS2048509}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -1,19 +0,0 @@ | |||
#ifndef PQCLEAN_NTRUHPS2048509_AVX2_API_H | |||
#define PQCLEAN_NTRUHPS2048509_AVX2_API_H | |||
#include <stdint.h> | |||
#define PQCLEAN_NTRUHPS2048509_AVX2_CRYPTO_SECRETKEYBYTES 935 | |||
#define PQCLEAN_NTRUHPS2048509_AVX2_CRYPTO_PUBLICKEYBYTES 699 | |||
#define PQCLEAN_NTRUHPS2048509_AVX2_CRYPTO_CIPHERTEXTBYTES 699 | |||
#define PQCLEAN_NTRUHPS2048509_AVX2_CRYPTO_BYTES 32 | |||
#define PQCLEAN_NTRUHPS2048509_AVX2_CRYPTO_ALGNAME "ntruhps2048509" | |||
int PQCLEAN_NTRUHPS2048509_AVX2_crypto_kem_keypair(uint8_t *pk, uint8_t *sk); | |||
int PQCLEAN_NTRUHPS2048509_AVX2_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk); | |||
int PQCLEAN_NTRUHPS2048509_AVX2_crypto_kem_dec(uint8_t *k, const uint8_t *c, const uint8_t *sk); | |||
#endif |
@@ -1,11 +0,0 @@ | |||
#include "cmov.h" | |||
/* b = 1 means mov, b = 0 means don't mov*/ | |||
void PQCLEAN_NTRUHPS2048509_AVX2_cmov(unsigned char *r, const unsigned char *x, size_t len, unsigned char b) { | |||
size_t i; | |||
b = (~b + 1); | |||
for (i = 0; i < len; i++) { | |||
r[i] ^= b & (x[i] ^ r[i]); | |||
} | |||
} |
@@ -1,10 +0,0 @@ | |||
#ifndef VERIFY_H | |||
#define VERIFY_H | |||
#include "params.h" | |||
#include <stddef.h> | |||
void PQCLEAN_NTRUHPS2048509_AVX2_cmov(unsigned char *r, const unsigned char *x, size_t len, unsigned char b); | |||
#endif |
@@ -1,11 +0,0 @@ | |||
#ifndef CRYPTO_SORT | |||
#define CRYPTO_SORT | |||
#include "params.h" | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
void PQCLEAN_NTRUHPS2048509_AVX2_crypto_sort_int32(int32_t *x, size_t n); | |||
#endif |
@@ -1,63 +0,0 @@ | |||
#include "api.h" | |||
#include "cmov.h" | |||
#include "fips202.h" | |||
#include "owcpa.h" | |||
#include "params.h" | |||
#include "randombytes.h" | |||
#include "sample.h" | |||
// API FUNCTIONS | |||
int PQCLEAN_NTRUHPS2048509_AVX2_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { | |||
uint8_t seed[NTRU_SAMPLE_FG_BYTES]; | |||
randombytes(seed, NTRU_SAMPLE_FG_BYTES); | |||
PQCLEAN_NTRUHPS2048509_AVX2_owcpa_keypair(pk, sk, seed); | |||
randombytes(sk + NTRU_OWCPA_SECRETKEYBYTES, NTRU_PRFKEYBYTES); | |||
return 0; | |||
} | |||
int PQCLEAN_NTRUHPS2048509_AVX2_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk) { | |||
poly r, m; | |||
uint8_t rm[NTRU_OWCPA_MSGBYTES]; | |||
uint8_t rm_seed[NTRU_SAMPLE_RM_BYTES]; | |||
randombytes(rm_seed, NTRU_SAMPLE_RM_BYTES); | |||
PQCLEAN_NTRUHPS2048509_AVX2_sample_rm(&r, &m, rm_seed); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(rm, &r); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, &m); | |||
sha3_256(k, rm, NTRU_OWCPA_MSGBYTES); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Z3_to_Zq(&r); | |||
PQCLEAN_NTRUHPS2048509_AVX2_owcpa_enc(c, &r, &m, pk); | |||
return 0; | |||
} | |||
int PQCLEAN_NTRUHPS2048509_AVX2_crypto_kem_dec(uint8_t *k, const uint8_t *c, const uint8_t *sk) { | |||
int i, fail; | |||
uint8_t rm[NTRU_OWCPA_MSGBYTES]; | |||
uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES]; | |||
fail = PQCLEAN_NTRUHPS2048509_AVX2_owcpa_dec(rm, c, sk); | |||
/* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */ | |||
/* See comment in PQCLEAN_NTRUHPS2048509_AVX2_owcpa_dec for details. */ | |||
sha3_256(k, rm, NTRU_OWCPA_MSGBYTES); | |||
/* shake(secret PRF key || input ciphertext) */ | |||
for (i = 0; i < NTRU_PRFKEYBYTES; i++) { | |||
buf[i] = sk[i + NTRU_OWCPA_SECRETKEYBYTES]; | |||
} | |||
for (i = 0; i < NTRU_CIPHERTEXTBYTES; i++) { | |||
buf[NTRU_PRFKEYBYTES + i] = c[i]; | |||
} | |||
sha3_256(rm, buf, NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES); | |||
PQCLEAN_NTRUHPS2048509_AVX2_cmov(k, rm, NTRU_SHAREDKEYBYTES, (unsigned char) fail); | |||
return 0; | |||
} |
@@ -1,183 +0,0 @@ | |||
#include "owcpa.h" | |||
#include "poly.h" | |||
#include "sample.h" | |||
static int owcpa_check_ciphertext(const unsigned char *ciphertext) { | |||
/* A ciphertext is log2(q)*(n-1) bits packed into bytes. */ | |||
/* Check that any unused bits of the final byte are zero. */ | |||
uint16_t t = 0; | |||
t = ciphertext[NTRU_CIPHERTEXTBYTES - 1]; | |||
t &= 0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG))); | |||
/* We have 0 <= t < 256 */ | |||
/* Return 0 on success (t=0), 1 on failure */ | |||
return (int) (1 & ((~t + 1) >> 15)); | |||
} | |||
static int owcpa_check_r(const poly *r) { | |||
/* A valid r has coefficients in {0,1,q-1} and has r[N-1] = 0 */ | |||
/* Note: We may assume that 0 <= r[i] <= q-1 for all i */ | |||
int i; | |||
uint32_t t = 0; | |||
uint16_t c; | |||
for (i = 0; i < NTRU_N - 1; i++) { | |||
c = r->coeffs[i]; | |||
t |= (c + 1) & (NTRU_Q - 4); /* 0 iff c is in {-1,0,1,2} */ | |||
t |= (c + 2) & 4; /* 1 if c = 2, 0 if c is in {-1,0,1} */ | |||
} | |||
t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */ | |||
/* We have 0 <= t < 2^16. */ | |||
/* Return 0 on success (t=0), 1 on failure */ | |||
return (int) (1 & ((~t + 1) >> 31)); | |||
} | |||
static int owcpa_check_m(const poly *m) { | |||
/* Check that m is in message space, i.e. */ | |||
/* (1) |{i : m[i] = 1}| = |{i : m[i] = 2}|, and */ | |||
/* (2) |{i : m[i] != 0}| = NTRU_WEIGHT. */ | |||
/* Note: We may assume that m has coefficients in {0,1,2}. */ | |||
int i; | |||
uint32_t t = 0; | |||
uint16_t ps = 0; | |||
uint16_t ms = 0; | |||
for (i = 0; i < NTRU_N; i++) { | |||
ps += m->coeffs[i] & 1; | |||
ms += m->coeffs[i] & 2; | |||
} | |||
t |= ps ^ (ms >> 1); /* 0 if (1) holds */ | |||
t |= ms ^ NTRU_WEIGHT; /* 0 if (1) and (2) hold */ | |||
/* We have 0 <= t < 2^16. */ | |||
/* Return 0 on success (t=0), 1 on failure */ | |||
return (int) (1 & ((~t + 1) >> 31)); | |||
} | |||
void PQCLEAN_NTRUHPS2048509_AVX2_owcpa_keypair(unsigned char *pk, | |||
unsigned char *sk, | |||
const unsigned char seed[NTRU_SAMPLE_FG_BYTES]) { | |||
int i; | |||
poly x1, x2, x3, x4, x5; | |||
poly *f = &x1, *g = &x2, *invf_mod3 = &x3; | |||
poly *gf = &x3, *invgf = &x4, *tmp = &x5; | |||
poly *invh = &x3, *h = &x3; | |||
PQCLEAN_NTRUHPS2048509_AVX2_sample_fg(f, g, seed); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_inv(invf_mod3, f); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(sk, f); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(sk + NTRU_PACK_TRINARY_BYTES, invf_mod3); | |||
/* Lift coeffs of f and g from Z_p to Z_q */ | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Z3_to_Zq(f); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Z3_to_Zq(g); | |||
/* g = 3*g */ | |||
for (i = 0; i < NTRU_N; i++) { | |||
g->coeffs[i] = 3 * g->coeffs[i]; | |||
} | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(gf, g, f); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_inv(invgf, gf); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(tmp, invgf, f); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_mul(invh, tmp, f); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_tobytes(sk + 2 * NTRU_PACK_TRINARY_BYTES, invh); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(tmp, invgf, g); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(h, tmp, g); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_tobytes(pk, h); | |||
} | |||
void PQCLEAN_NTRUHPS2048509_AVX2_owcpa_enc(unsigned char *c, | |||
const poly *r, | |||
const poly *m, | |||
const unsigned char *pk) { | |||
int i; | |||
poly x1, x2; | |||
poly *h = &x1, *liftm = &x1; | |||
poly *ct = &x2; | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_frombytes(h, pk); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(ct, r, h); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_lift(liftm, m); | |||
for (i = 0; i < NTRU_N; i++) { | |||
ct->coeffs[i] = ct->coeffs[i] + liftm->coeffs[i]; | |||
} | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_tobytes(c, ct); | |||
} | |||
int PQCLEAN_NTRUHPS2048509_AVX2_owcpa_dec(unsigned char *rm, | |||
const unsigned char *ciphertext, | |||
const unsigned char *secretkey) { | |||
int i; | |||
int fail; | |||
poly x1, x2, x3, x4; | |||
poly *c = &x1, *f = &x2, *cf = &x3; | |||
poly *mf = &x2, *finv3 = &x3, *m = &x4; | |||
poly *liftm = &x2, *invh = &x3, *r = &x4; | |||
poly *b = &x1; | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_frombytes(c, ciphertext); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_frombytes(f, secretkey); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Z3_to_Zq(f); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(cf, c, f); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_to_S3(mf, cf); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_frombytes(finv3, secretkey + NTRU_PACK_TRINARY_BYTES); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_mul(m, mf, finv3); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, m); | |||
fail = 0; | |||
/* Check that the unused bits of the last byte of the ciphertext are zero */ | |||
fail |= owcpa_check_ciphertext(ciphertext); | |||
/* For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */ | |||
/* We can avoid re-computing r*h + Lift(m) as long as we check that */ | |||
/* r (defined as b/h mod (q, Phi_n)) and m are in the message space. */ | |||
/* (m can take any value in S3 in NTRU_HRSS) */ | |||
fail |= owcpa_check_m(m); | |||
/* b = c - Lift(m) mod (q, x^n - 1) */ | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_lift(liftm, m); | |||
for (i = 0; i < NTRU_N; i++) { | |||
b->coeffs[i] = c->coeffs[i] - liftm->coeffs[i]; | |||
} | |||
/* r = b / h mod (q, Phi_n) */ | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_frombytes(invh, secretkey + 2 * NTRU_PACK_TRINARY_BYTES); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_mul(r, b, invh); | |||
/* NOTE: Our definition of r as b/h mod (q, Phi_n) follows Figure 4 of */ | |||
/* [Sch18] https://eprint.iacr.org/2018/1174/20181203:032458. */ | |||
/* This differs from Figure 10 of Saito--Xagawa--Yamakawa */ | |||
/* [SXY17] https://eprint.iacr.org/2017/1005/20180516:055500 */ | |||
/* where r gets a final reduction modulo p. */ | |||
/* We need this change to use Proposition 1 of [Sch18]. */ | |||
/* Proposition 1 of [Sch18] shows that re-encryption with (r,m) yields c. */ | |||
/* if and only if fail==0 after the following call to owcpa_check_r */ | |||
/* The procedure given in Fig. 8 of [Sch18] can be skipped because we have */ | |||
/* c(1) = 0 due to the use of poly_Rq_sum_zero_{to,from}bytes. */ | |||
fail |= owcpa_check_r(r); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_trinary_Zq_to_Z3(r); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(rm, r); | |||
return fail; | |||
} |
@@ -1,19 +0,0 @@ | |||
#ifndef OWCPA_H | |||
#define OWCPA_H | |||
#include "params.h" | |||
#include "poly.h" | |||
void PQCLEAN_NTRUHPS2048509_AVX2_owcpa_keypair(unsigned char *pk, | |||
unsigned char *sk, | |||
const unsigned char seed[NTRU_SAMPLE_FG_BYTES]); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_owcpa_enc(unsigned char *c, | |||
const poly *r, | |||
const poly *m, | |||
const unsigned char *pk); | |||
int PQCLEAN_NTRUHPS2048509_AVX2_owcpa_dec(unsigned char *rm, | |||
const unsigned char *ciphertext, | |||
const unsigned char *secretkey); | |||
#endif |
@@ -1,46 +0,0 @@ | |||
#include "poly.h" | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(unsigned char msg[NTRU_OWCPA_MSGBYTES], const poly *a) { | |||
int i; | |||
unsigned char c; | |||
int j; | |||
for (i = 0; i < NTRU_PACK_DEG / 5; i++) { | |||
c = a->coeffs[5 * i + 4] & 255; | |||
c = (3 * c + a->coeffs[5 * i + 3]) & 255; | |||
c = (3 * c + a->coeffs[5 * i + 2]) & 255; | |||
c = (3 * c + a->coeffs[5 * i + 1]) & 255; | |||
c = (3 * c + a->coeffs[5 * i + 0]) & 255; | |||
msg[i] = c; | |||
} | |||
i = NTRU_PACK_DEG / 5; | |||
c = 0; | |||
for (j = NTRU_PACK_DEG - (5 * i) - 1; j >= 0; j--) { | |||
c = (3 * c + a->coeffs[5 * i + j]) & 255; | |||
} | |||
msg[i] = c; | |||
} | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_frombytes(poly *r, const unsigned char msg[NTRU_OWCPA_MSGBYTES]) { | |||
int i; | |||
unsigned char c; | |||
int j; | |||
for (i = 0; i < NTRU_PACK_DEG / 5; i++) { | |||
c = msg[i]; | |||
r->coeffs[5 * i + 0] = c; | |||
r->coeffs[5 * i + 1] = c * 171 >> 9; // this is division by 3 | |||
r->coeffs[5 * i + 2] = c * 57 >> 9; // division by 3^2 | |||
r->coeffs[5 * i + 3] = c * 19 >> 9; // division by 3^3 | |||
r->coeffs[5 * i + 4] = c * 203 >> 14; // etc. | |||
} | |||
i = NTRU_PACK_DEG / 5; | |||
c = msg[i]; | |||
for (j = 0; (5 * i + j) < NTRU_PACK_DEG; j++) { | |||
r->coeffs[5 * i + j] = c; | |||
c = c * 171 >> 9; | |||
} | |||
r->coeffs[NTRU_N - 1] = 0; | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_3_Phi_n(r); | |||
} | |||
@@ -1,93 +0,0 @@ | |||
#include "poly.h" | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_tobytes(unsigned char *r, const poly *a) { | |||
int i, j; | |||
uint16_t t[8]; | |||
for (i = 0; i < NTRU_PACK_DEG / 8; i++) { | |||
for (j = 0; j < 8; j++) { | |||
t[j] = MODQ(a->coeffs[8 * i + j]); | |||
} | |||
r[11 * i + 0] = (unsigned char) ( t[0] & 0xff); | |||
r[11 * i + 1] = (unsigned char) ((t[0] >> 8) | ((t[1] & 0x1f) << 3)); | |||
r[11 * i + 2] = (unsigned char) ((t[1] >> 5) | ((t[2] & 0x03) << 6)); | |||
r[11 * i + 3] = (unsigned char) ((t[2] >> 2) & 0xff); | |||
r[11 * i + 4] = (unsigned char) ((t[2] >> 10) | ((t[3] & 0x7f) << 1)); | |||
r[11 * i + 5] = (unsigned char) ((t[3] >> 7) | ((t[4] & 0x0f) << 4)); | |||
r[11 * i + 6] = (unsigned char) ((t[4] >> 4) | ((t[5] & 0x01) << 7)); | |||
r[11 * i + 7] = (unsigned char) ((t[5] >> 1) & 0xff); | |||
r[11 * i + 8] = (unsigned char) ((t[5] >> 9) | ((t[6] & 0x3f) << 2)); | |||
r[11 * i + 9] = (unsigned char) ((t[6] >> 6) | ((t[7] & 0x07) << 5)); | |||
r[11 * i + 10] = (unsigned char) ((t[7] >> 3)); | |||
} | |||
for (j = 0; j < NTRU_PACK_DEG - 8 * i; j++) { | |||
t[j] = MODQ(a->coeffs[8 * i + j]); | |||
} | |||
for (; j < 8; j++) { | |||
t[j] = 0; | |||
} | |||
switch (NTRU_PACK_DEG & 0x07) { | |||
// cases 0 and 6 are impossible since 2 generates (Z/n)* and | |||
// p mod 8 in {1, 7} implies that 2 is a quadratic residue. | |||
case 4: | |||
r[11 * i + 0] = (unsigned char) (t[0] & 0xff); | |||
r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3); | |||
r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6); | |||
r[11 * i + 3] = (unsigned char) (t[2] >> 2) & 0xff; | |||
r[11 * i + 4] = (unsigned char) (t[2] >> 10) | ((t[3] & 0x7f) << 1); | |||
r[11 * i + 5] = (unsigned char) (t[3] >> 7) | ((t[4] & 0x0f) << 4); | |||
break; | |||
case 2: | |||
r[11 * i + 0] = (unsigned char) (t[0] & 0xff); | |||
r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3); | |||
r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6); | |||
break; | |||
} | |||
} | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_frombytes(poly *r, const unsigned char *a) { | |||
int i; | |||
for (i = 0; i < NTRU_PACK_DEG / 8; i++) { | |||
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8); | |||
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5); | |||
r->coeffs[8 * i + 2] = (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10); | |||
r->coeffs[8 * i + 3] = (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7); | |||
r->coeffs[8 * i + 4] = (a[11 * i + 5] >> 4) | (((uint16_t)a[11 * i + 6] & 0x7f) << 4); | |||
r->coeffs[8 * i + 5] = (a[11 * i + 6] >> 7) | (((uint16_t)a[11 * i + 7] & 0xff) << 1) | (((uint16_t)a[11 * i + 8] & 0x03) << 9); | |||
r->coeffs[8 * i + 6] = (a[11 * i + 8] >> 2) | (((uint16_t)a[11 * i + 9] & 0x1f) << 6); | |||
r->coeffs[8 * i + 7] = (a[11 * i + 9] >> 5) | (((uint16_t)a[11 * i + 10] & 0xff) << 3); | |||
} | |||
switch (NTRU_PACK_DEG & 0x07) { | |||
// cases 0 and 6 are impossible since 2 generates (Z/n)* and | |||
// p mod 8 in {1, 7} implies that 2 is a quadratic residue. | |||
case 4: | |||
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8); | |||
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5); | |||
r->coeffs[8 * i + 2] = (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10); | |||
r->coeffs[8 * i + 3] = (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7); | |||
break; | |||
case 2: | |||
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8); | |||
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5); | |||
break; | |||
} | |||
r->coeffs[NTRU_N - 1] = 0; | |||
} | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a) { | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_tobytes(r, a); | |||
} | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_frombytes(poly *r, const unsigned char *a) { | |||
int i; | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_frombytes(r, a); | |||
/* Set r[n-1] so that the sum of coefficients is zero mod q */ | |||
r->coeffs[NTRU_N - 1] = 0; | |||
for (i = 0; i < NTRU_PACK_DEG; i++) { | |||
r->coeffs[NTRU_N - 1] -= r->coeffs[i]; | |||
} | |||
} |
@@ -1,37 +0,0 @@ | |||
#ifndef PARAMS_H | |||
#define PARAMS_H | |||
#define NTRU_HPS | |||
#define NTRU_N 509 | |||
#define NTRU_LOGQ 11 | |||
/* Do not modify below this line */ | |||
#define PAD32(X) ((((X) + 31)/32)*32) | |||
#define NTRU_Q (1 << NTRU_LOGQ) | |||
#define NTRU_WEIGHT (NTRU_Q/8 - 2) | |||
#define NTRU_SEEDBYTES 32 | |||
#define NTRU_PRFKEYBYTES 32 | |||
#define NTRU_SHAREDKEYBYTES 32 | |||
#define NTRU_SAMPLE_IID_BYTES (NTRU_N-1) | |||
#define NTRU_SAMPLE_FT_BYTES ((30*(NTRU_N-1)+7)/8) | |||
#define NTRU_SAMPLE_FG_BYTES (NTRU_SAMPLE_IID_BYTES+NTRU_SAMPLE_FT_BYTES) | |||
#define NTRU_SAMPLE_RM_BYTES (NTRU_SAMPLE_IID_BYTES+NTRU_SAMPLE_FT_BYTES) | |||
#define NTRU_PACK_DEG (NTRU_N-1) | |||
#define NTRU_PACK_TRINARY_BYTES ((NTRU_PACK_DEG+4)/5) | |||
#define NTRU_OWCPA_MSGBYTES (2*NTRU_PACK_TRINARY_BYTES) | |||
#define NTRU_OWCPA_PUBLICKEYBYTES ((NTRU_LOGQ*NTRU_PACK_DEG+7)/8) | |||
#define NTRU_OWCPA_SECRETKEYBYTES (2*NTRU_PACK_TRINARY_BYTES + NTRU_OWCPA_PUBLICKEYBYTES) | |||
#define NTRU_OWCPA_BYTES ((NTRU_LOGQ*NTRU_PACK_DEG+7)/8) | |||
#define NTRU_PUBLICKEYBYTES (NTRU_OWCPA_PUBLICKEYBYTES) | |||
#define NTRU_SECRETKEYBYTES (NTRU_OWCPA_SECRETKEYBYTES + NTRU_PRFKEYBYTES) | |||
#define NTRU_CIPHERTEXTBYTES (NTRU_OWCPA_BYTES) | |||
#endif |
@@ -1,75 +0,0 @@ | |||
#include "poly.h" | |||
/* Map {0, 1, 2} -> {0,1,q-1} in place */ | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Z3_to_Zq(poly *r) { | |||
int i; | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = r->coeffs[i] | ((-(r->coeffs[i] >> 1)) & (NTRU_Q - 1)); | |||
} | |||
} | |||
/* Map {0, 1, q-1} -> {0,1,2} in place */ | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_trinary_Zq_to_Z3(poly *r) { | |||
int i; | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = MODQ(r->coeffs[i]); | |||
r->coeffs[i] = 3 & (r->coeffs[i] ^ (r->coeffs[i] >> (NTRU_LOGQ - 1))); | |||
} | |||
} | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_mul(poly *r, const poly *a, const poly *b) { | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(r, a, b); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_q_Phi_n(r); | |||
} | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_mul(poly *r, const poly *a, const poly *b) { | |||
int i; | |||
/* Our S3 multiplications do not overflow mod q, */ | |||
/* so we can re-purpose PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul, as long as we */ | |||
/* follow with an explicit reduction mod q. */ | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(r, a, b); | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = MODQ(r->coeffs[i]); | |||
} | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_3_Phi_n(r); | |||
} | |||
static void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { | |||
int i; | |||
poly b, c; | |||
poly s; | |||
// for 0..4 | |||
// ai = ai * (2 - a*ai) mod q | |||
for (i = 0; i < NTRU_N; i++) { | |||
b.coeffs[i] = -(a->coeffs[i]); | |||
} | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = ai->coeffs[i]; | |||
} | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(&c, r, &b); | |||
c.coeffs[0] += 2; // c = 2 - a*ai | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(&s, &c, r); // s = ai*c | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(&c, &s, &b); | |||
c.coeffs[0] += 2; // c = 2 - a*s | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(r, &c, &s); // r = s*c | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(&c, r, &b); | |||
c.coeffs[0] += 2; // c = 2 - a*r | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(&s, &c, r); // s = r*c | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(&c, &s, &b); | |||
c.coeffs[0] += 2; // c = 2 - a*s | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(r, &c, &s); // r = s*c | |||
} | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_inv(poly *r, const poly *a) { | |||
poly ai2; | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_inv(&ai2, a); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_inv_to_Rq_inv(r, &ai2, a); | |||
} |
@@ -1,41 +0,0 @@ | |||
#ifndef POLY_H | |||
#define POLY_H | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include "params.h" | |||
#define MODQ(X) ((X) & (NTRU_Q-1)) | |||
typedef union { /* align to 32 byte boundary for vmovdqa */ | |||
uint16_t coeffs[PAD32(NTRU_N)]; | |||
__m256i coeffs_x16[PAD32(NTRU_N) / 16]; | |||
} poly; | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_3_Phi_n(poly *r); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_q_Phi_n(poly *r); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_tobytes(unsigned char *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_frombytes(poly *r, const unsigned char *a); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_frombytes(poly *r, const unsigned char *a); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(unsigned char msg[NTRU_PACK_TRINARY_BYTES], const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_frombytes(poly *r, const unsigned char msg[NTRU_PACK_TRINARY_BYTES]); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_mul(poly *r, const poly *a, const poly *b); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(poly *r, const poly *a, const poly *b); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_mul(poly *r, const poly *a, const poly *b); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_lift(poly *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_to_S3(poly *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_inv(poly *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_inv(poly *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_inv(poly *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Z3_to_Zq(poly *r); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_trinary_Zq_to_Z3(poly *r); | |||
#endif |
@@ -1,11 +0,0 @@ | |||
#include "poly.h" | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_lift(poly *r, const poly *a) { | |||
int i; | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = a->coeffs[i]; | |||
} | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Z3_to_Zq(r); | |||
} | |||
@@ -1,676 +0,0 @@ | |||
.data | |||
.p2align 5 | |||
mask_ff: | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
mask_f: | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
mask_3: | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.text | |||
.global PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_3_Phi_n | |||
.global _PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_3_Phi_n | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_3_Phi_n: | |||
_PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_3_Phi_n: | |||
vmovdqa 992(%rdi), %ymm0 | |||
vpermq $3, %ymm0, %ymm0 | |||
vpslld $17, %ymm0, %ymm0 | |||
vpsrld $16, %ymm0, %ymm1 | |||
vpor %ymm0, %ymm1, %ymm0 | |||
vbroadcastss %xmm0, %ymm0 | |||
vpaddw 0(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 0(%rdi) | |||
vpaddw 32(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 32(%rdi) | |||
vpaddw 64(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 64(%rdi) | |||
vpaddw 96(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 96(%rdi) | |||
vpaddw 128(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 128(%rdi) | |||
vpaddw 160(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 160(%rdi) | |||
vpaddw 192(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 192(%rdi) | |||
vpaddw 224(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 224(%rdi) | |||
vpaddw 256(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 256(%rdi) | |||
vpaddw 288(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 288(%rdi) | |||
vpaddw 320(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 320(%rdi) | |||
vpaddw 352(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 352(%rdi) | |||
vpaddw 384(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 384(%rdi) | |||
vpaddw 416(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 416(%rdi) | |||
vpaddw 448(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 448(%rdi) | |||
vpaddw 480(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 480(%rdi) | |||
vpaddw 512(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 512(%rdi) | |||
vpaddw 544(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 544(%rdi) | |||
vpaddw 576(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 576(%rdi) | |||
vpaddw 608(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 608(%rdi) | |||
vpaddw 640(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 640(%rdi) | |||
vpaddw 672(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 672(%rdi) | |||
vpaddw 704(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 704(%rdi) | |||
vpaddw 736(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 736(%rdi) | |||
vpaddw 768(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 768(%rdi) | |||
vpaddw 800(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 800(%rdi) | |||
vpaddw 832(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 832(%rdi) | |||
vpaddw 864(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 864(%rdi) | |||
vpaddw 896(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 896(%rdi) | |||
vpaddw 928(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 928(%rdi) | |||
vpaddw 960(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 960(%rdi) | |||
vpaddw 992(%rdi), %ymm0, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 992(%rdi) | |||
movw $0, 1018(%rdi) | |||
movw $0, 1020(%rdi) | |||
movw $0, 1022(%rdi) | |||
ret |
@@ -1,80 +0,0 @@ | |||
.data | |||
.p2align 5 | |||
.text | |||
.global PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_q_Phi_n | |||
.global _PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_q_Phi_n | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_q_Phi_n: | |||
_PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_q_Phi_n: | |||
vmovdqa 992(%rdi), %ymm0 | |||
vpermq $3, %ymm0, %ymm0 | |||
vpslld $16, %ymm0, %ymm0 | |||
vpsrld $16, %ymm0, %ymm1 | |||
vpor %ymm0, %ymm1, %ymm0 | |||
vbroadcastss %xmm0, %ymm0 | |||
vxorpd %ymm1, %ymm1, %ymm1 | |||
vpsubw %ymm0, %ymm1, %ymm0 | |||
vpaddw 0(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 0(%rdi) | |||
vpaddw 32(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 32(%rdi) | |||
vpaddw 64(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 64(%rdi) | |||
vpaddw 96(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 96(%rdi) | |||
vpaddw 128(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 128(%rdi) | |||
vpaddw 160(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 160(%rdi) | |||
vpaddw 192(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 192(%rdi) | |||
vpaddw 224(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 224(%rdi) | |||
vpaddw 256(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 256(%rdi) | |||
vpaddw 288(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 288(%rdi) | |||
vpaddw 320(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 320(%rdi) | |||
vpaddw 352(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 352(%rdi) | |||
vpaddw 384(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 384(%rdi) | |||
vpaddw 416(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 416(%rdi) | |||
vpaddw 448(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 448(%rdi) | |||
vpaddw 480(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 480(%rdi) | |||
vpaddw 512(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 512(%rdi) | |||
vpaddw 544(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 544(%rdi) | |||
vpaddw 576(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 576(%rdi) | |||
vpaddw 608(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 608(%rdi) | |||
vpaddw 640(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 640(%rdi) | |||
vpaddw 672(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 672(%rdi) | |||
vpaddw 704(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 704(%rdi) | |||
vpaddw 736(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 736(%rdi) | |||
vpaddw 768(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 768(%rdi) | |||
vpaddw 800(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 800(%rdi) | |||
vpaddw 832(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 832(%rdi) | |||
vpaddw 864(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 864(%rdi) | |||
vpaddw 896(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 896(%rdi) | |||
vpaddw 928(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 928(%rdi) | |||
vpaddw 960(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 960(%rdi) | |||
vpaddw 992(%rdi), %ymm0, %ymm1 | |||
vmovdqa %ymm1, 992(%rdi) | |||
ret |
@@ -1,80 +0,0 @@ | |||
#include <immintrin.h> | |||
#include "poly_r2_inv.h" | |||
#include "poly.h" | |||
// Using pdep/pext for these two functions is faster but not a lot since they work on uint64_t which means | |||
// we can only do 4 coefficients at a time. Per byte (where we store 8 coefficients) we will thus need 2 pdeps/pexts | |||
// and an additional shift. In the case of tobytes we also need a logical or. | |||
// On AMD Ryzen pdep/pext are quite slow and the naive solution (looping through and setting each bit individually) | |||
// is preferred. | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_tobytes(unsigned char *out, const poly *a) { | |||
// Since pext works on a uint64_t we view the coefficient pointer as a 64-bit pointer | |||
// so that we can extract 4 coefficient at a time. It also makes arithmetic a little easier. | |||
uint64_t *coeff_pointer = (void *) a->coeffs; | |||
int i; | |||
for (i = 0; i < 63; i++) { | |||
out[i] = _pext_u64(coeff_pointer[2 * i], 0x1000100010001); | |||
out[i] |= _pext_u64(coeff_pointer[2 * i + 1], 0x1000100010001) << 4; | |||
} | |||
out[i] = _pext_u64(coeff_pointer[2 * 63], 0x1000100010001); | |||
out[i] |= _pext_u64(coeff_pointer[2 * 63 + 1], 0x1) << 4; | |||
} | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_frombytes(poly *a, const unsigned char *in) { | |||
// Since pdep results in a uint64_t we view the coefficient pointer as a 64-bit pointer | |||
// so that we can store 4 coefficient at a time. It also makes arithmetic a little easier. | |||
uint64_t *coeff_pointer = (void *) a->coeffs; | |||
int i; | |||
for (i = 0; i < 63; i++) { | |||
coeff_pointer[2 * i] = _pdep_u64(in[i], 0x1000100010001); | |||
coeff_pointer[2 * i + 1] = _pdep_u64(in[i] >> 4, 0x1000100010001); | |||
} | |||
// From the last byte we only want 5 bits (since we have 509 total, not 512). | |||
coeff_pointer[2 * 63] = _pdep_u64(in[i], 0x1000100010001); | |||
coeff_pointer[2 * 63 + 1] = _pdep_u64(in[i] >> 4, 0x1); | |||
} | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_inv(poly *r, const poly *a) { | |||
union { | |||
unsigned char s[64]; | |||
__m256i s_x32[2]; | |||
} squares[13]; | |||
#define s(x) squares[(x)].s | |||
// This relies on the following addition chain: | |||
// 1, 2, 3, 6, 12, 15, 30, 60, 63, 126, 252, 504, 507 | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_tobytes(s(0), a); // TODO alignment | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_1_509(s(1), s(0)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(1), s(1), s(0)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_1_509(s(2), s(1)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(2), s(2), s(0)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_3_509(s(3), s(2)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(3), s(3), s(2)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_6_509(s(4), s(3)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(4), s(4), s(3)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_3_509(s(5), s(4)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(5), s(5), s(2)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_15_509(s(6), s(5)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(6), s(6), s(5)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_30_509(s(7), s(6)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(7), s(7), s(6)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_3_509(s(8), s(7)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(8), s(8), s(2)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_63_509(s(9), s(8)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(9), s(9), s(8)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_126_509(s(10), s(9)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(10), s(10), s(9)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_252_509(s(11), s(10)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(11), s(11), s(10)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_3_509(s(12), s(11)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(12), s(12), s(2)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_1_509(s(0), s(12)); | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_frombytes(r, s(0)); | |||
#undef s | |||
} |
@@ -1,20 +0,0 @@ | |||
#ifndef POLY_R2_INV_H | |||
#define POLY_R2_INV_H | |||
#include "poly.h" | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_tobytes(unsigned char *out, const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_frombytes(poly *a, const unsigned char *in); | |||
extern void PQCLEAN_NTRUHPS2048509_AVX2_square_1_509(unsigned char *out, const unsigned char *a); | |||
extern void PQCLEAN_NTRUHPS2048509_AVX2_square_3_509(unsigned char *out, const unsigned char *a); | |||
extern void PQCLEAN_NTRUHPS2048509_AVX2_square_6_509(unsigned char *out, const unsigned char *a); | |||
extern void PQCLEAN_NTRUHPS2048509_AVX2_square_15_509(unsigned char *out, const unsigned char *a); | |||
extern void PQCLEAN_NTRUHPS2048509_AVX2_square_30_509(unsigned char *out, const unsigned char *a); | |||
extern void PQCLEAN_NTRUHPS2048509_AVX2_square_63_509(unsigned char *out, const unsigned char *a); | |||
extern void PQCLEAN_NTRUHPS2048509_AVX2_square_126_509(unsigned char *out, const unsigned char *a); | |||
extern void PQCLEAN_NTRUHPS2048509_AVX2_square_252_509(unsigned char *out, const unsigned char *a); | |||
extern void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(unsigned char *out, const unsigned char *a, | |||
const unsigned char *b); | |||
#endif |
@@ -1,285 +0,0 @@ | |||
.data | |||
.p2align 5 | |||
mask1100: | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
mask0110: | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
mask0011: | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
mask1000: | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
mask0111: | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
.word 0 | |||
low253: | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 65535 | |||
.word 8191 | |||
.text | |||
.global PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul | |||
.global _PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul: | |||
_PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul: | |||
vmovdqa 0(%rsi), %ymm0 | |||
vmovdqa 32(%rsi), %ymm1 | |||
vmovdqa 0(%rdx), %ymm3 | |||
vmovdqa 32(%rdx), %ymm4 | |||
vpxor %ymm0, %ymm1, %ymm6 | |||
vpxor %ymm3, %ymm4, %ymm7 | |||
vextracti128 $1, %ymm0, %xmm11 | |||
vextracti128 $1, %ymm3, %xmm12 | |||
vpclmulqdq $1, %xmm11, %xmm12, %xmm5 | |||
vpclmulqdq $16, %xmm11, %xmm12, %xmm14 | |||
vpclmulqdq $17, %xmm11, %xmm12, %xmm15 | |||
vpxor %xmm5, %xmm14, %xmm14 | |||
vpclmulqdq $0, %xmm11, %xmm12, %xmm5 | |||
vpermq $16, %ymm14, %ymm14 | |||
vinserti128 $1, %xmm15, %ymm15, %ymm15 | |||
vpand mask0011(%rip), %ymm5, %ymm5 | |||
vpand mask0110(%rip), %ymm14, %ymm14 | |||
vpand mask1100(%rip), %ymm15, %ymm15 | |||
vpxor %ymm5, %ymm14, %ymm14 | |||
vpxor %ymm14, %ymm15, %ymm5 | |||
vpxor %xmm0, %xmm11, %xmm11 | |||
vpxor %xmm3, %xmm12, %xmm12 | |||
vpclmulqdq $1, %xmm11, %xmm12, %xmm13 | |||
vpclmulqdq $16, %xmm11, %xmm12, %xmm14 | |||
vpclmulqdq $17, %xmm11, %xmm12, %xmm15 | |||
vpxor %xmm13, %xmm14, %xmm14 | |||
vpclmulqdq $0, %xmm11, %xmm12, %xmm13 | |||
vpermq $16, %ymm14, %ymm14 | |||
vinserti128 $1, %xmm15, %ymm15, %ymm15 | |||
vpand mask0011(%rip), %ymm13, %ymm13 | |||
vpand mask0110(%rip), %ymm14, %ymm14 | |||
vpand mask1100(%rip), %ymm15, %ymm15 | |||
vpxor %ymm13, %ymm14, %ymm14 | |||
vpxor %ymm14, %ymm15, %ymm13 | |||
vpclmulqdq $1, %xmm0, %xmm3, %xmm2 | |||
vpclmulqdq $16, %xmm0, %xmm3, %xmm14 | |||
vpclmulqdq $17, %xmm0, %xmm3, %xmm15 | |||
vpxor %xmm2, %xmm14, %xmm14 | |||
vpclmulqdq $0, %xmm0, %xmm3, %xmm2 | |||
vpermq $16, %ymm14, %ymm14 | |||
vinserti128 $1, %xmm15, %ymm15, %ymm15 | |||
vpand mask0011(%rip), %ymm2, %ymm2 | |||
vpand mask0110(%rip), %ymm14, %ymm14 | |||
vpand mask1100(%rip), %ymm15, %ymm15 | |||
vpxor %ymm2, %ymm14, %ymm14 | |||
vpxor %ymm14, %ymm15, %ymm2 | |||
vpxor %ymm13, %ymm5, %ymm13 | |||
vpxor %ymm13, %ymm2, %ymm13 | |||
vpxor %ymm11, %ymm11, %ymm11 | |||
vextracti128 $1, %ymm13, %xmm11 | |||
vpxor %ymm5, %ymm11, %ymm5 | |||
vpxor %ymm11, %ymm11, %ymm11 | |||
vinserti128 $1, %xmm13, %ymm11, %ymm11 | |||
vpxor %ymm11, %ymm2, %ymm2 | |||
vextracti128 $1, %ymm1, %xmm11 | |||
vextracti128 $1, %ymm4, %xmm12 | |||
vpclmulqdq $1, %xmm11, %xmm12, %xmm9 | |||
vpclmulqdq $16, %xmm11, %xmm12, %xmm14 | |||
vpclmulqdq $17, %xmm11, %xmm12, %xmm15 | |||
vpxor %xmm9, %xmm14, %xmm14 | |||
vpclmulqdq $0, %xmm11, %xmm12, %xmm9 | |||
vpermq $16, %ymm14, %ymm14 | |||
vinserti128 $1, %xmm15, %ymm15, %ymm15 | |||
vpand mask0011(%rip), %ymm9, %ymm9 | |||
vpand mask0110(%rip), %ymm14, %ymm14 | |||
vpand mask1100(%rip), %ymm15, %ymm15 | |||
vpxor %ymm9, %ymm14, %ymm14 | |||
vpxor %ymm14, %ymm15, %ymm9 | |||
vpxor %xmm1, %xmm11, %xmm11 | |||
vpxor %xmm4, %xmm12, %xmm12 | |||
vpclmulqdq $1, %xmm11, %xmm12, %xmm13 | |||
vpclmulqdq $16, %xmm11, %xmm12, %xmm14 | |||
vpclmulqdq $17, %xmm11, %xmm12, %xmm15 | |||
vpxor %xmm13, %xmm14, %xmm14 | |||
vpclmulqdq $0, %xmm11, %xmm12, %xmm13 | |||
vpermq $16, %ymm14, %ymm14 | |||
vinserti128 $1, %xmm15, %ymm15, %ymm15 | |||
vpand mask0011(%rip), %ymm13, %ymm13 | |||
vpand mask0110(%rip), %ymm14, %ymm14 | |||
vpand mask1100(%rip), %ymm15, %ymm15 | |||
vpxor %ymm13, %ymm14, %ymm14 | |||
vpxor %ymm14, %ymm15, %ymm13 | |||
vpclmulqdq $1, %xmm1, %xmm4, %xmm8 | |||
vpclmulqdq $16, %xmm1, %xmm4, %xmm14 | |||
vpclmulqdq $17, %xmm1, %xmm4, %xmm15 | |||
vpxor %xmm8, %xmm14, %xmm14 | |||
vpclmulqdq $0, %xmm1, %xmm4, %xmm8 | |||
vpermq $16, %ymm14, %ymm14 | |||
vinserti128 $1, %xmm15, %ymm15, %ymm15 | |||
vpand mask0011(%rip), %ymm8, %ymm8 | |||
vpand mask0110(%rip), %ymm14, %ymm14 | |||
vpand mask1100(%rip), %ymm15, %ymm15 | |||
vpxor %ymm8, %ymm14, %ymm14 | |||
vpxor %ymm14, %ymm15, %ymm8 | |||
vpxor %ymm13, %ymm9, %ymm13 | |||
vpxor %ymm13, %ymm8, %ymm13 | |||
vpxor %ymm11, %ymm11, %ymm11 | |||
vextracti128 $1, %ymm13, %xmm11 | |||
vpxor %ymm9, %ymm11, %ymm9 | |||
vpxor %ymm11, %ymm11, %ymm11 | |||
vinserti128 $1, %xmm13, %ymm11, %ymm11 | |||
vpxor %ymm11, %ymm8, %ymm8 | |||
vextracti128 $1, %ymm6, %xmm11 | |||
vextracti128 $1, %ymm7, %xmm12 | |||
vpclmulqdq $1, %xmm11, %xmm12, %xmm1 | |||
vpclmulqdq $16, %xmm11, %xmm12, %xmm14 | |||
vpclmulqdq $17, %xmm11, %xmm12, %xmm15 | |||
vpxor %xmm1, %xmm14, %xmm14 | |||
vpclmulqdq $0, %xmm11, %xmm12, %xmm1 | |||
vpermq $16, %ymm14, %ymm14 | |||
vinserti128 $1, %xmm15, %ymm15, %ymm15 | |||
vpand mask0011(%rip), %ymm1, %ymm1 | |||
vpand mask0110(%rip), %ymm14, %ymm14 | |||
vpand mask1100(%rip), %ymm15, %ymm15 | |||
vpxor %ymm1, %ymm14, %ymm14 | |||
vpxor %ymm14, %ymm15, %ymm1 | |||
vpxor %xmm6, %xmm11, %xmm11 | |||
vpxor %xmm7, %xmm12, %xmm12 | |||
vpclmulqdq $1, %xmm11, %xmm12, %xmm13 | |||
vpclmulqdq $16, %xmm11, %xmm12, %xmm14 | |||
vpclmulqdq $17, %xmm11, %xmm12, %xmm15 | |||
vpxor %xmm13, %xmm14, %xmm14 | |||
vpclmulqdq $0, %xmm11, %xmm12, %xmm13 | |||
vpermq $16, %ymm14, %ymm14 | |||
vinserti128 $1, %xmm15, %ymm15, %ymm15 | |||
vpand mask0011(%rip), %ymm13, %ymm13 | |||
vpand mask0110(%rip), %ymm14, %ymm14 | |||
vpand mask1100(%rip), %ymm15, %ymm15 | |||
vpxor %ymm13, %ymm14, %ymm14 | |||
vpxor %ymm14, %ymm15, %ymm13 | |||
vpclmulqdq $1, %xmm6, %xmm7, %xmm0 | |||
vpclmulqdq $16, %xmm6, %xmm7, %xmm14 | |||
vpclmulqdq $17, %xmm6, %xmm7, %xmm15 | |||
vpxor %xmm0, %xmm14, %xmm14 | |||
vpclmulqdq $0, %xmm6, %xmm7, %xmm0 | |||
vpermq $16, %ymm14, %ymm14 | |||
vinserti128 $1, %xmm15, %ymm15, %ymm15 | |||
vpand mask0011(%rip), %ymm0, %ymm0 | |||
vpand mask0110(%rip), %ymm14, %ymm14 | |||
vpand mask1100(%rip), %ymm15, %ymm15 | |||
vpxor %ymm0, %ymm14, %ymm14 | |||
vpxor %ymm14, %ymm15, %ymm0 | |||
vpxor %ymm13, %ymm1, %ymm13 | |||
vpxor %ymm13, %ymm0, %ymm13 | |||
vpxor %ymm11, %ymm11, %ymm11 | |||
vextracti128 $1, %ymm13, %xmm11 | |||
vpxor %ymm1, %ymm11, %ymm1 | |||
vpxor %ymm11, %ymm11, %ymm11 | |||
vinserti128 $1, %xmm13, %ymm11, %ymm11 | |||
vpxor %ymm11, %ymm0, %ymm0 | |||
vpxor %ymm0, %ymm2, %ymm0 | |||
vpxor %ymm0, %ymm8, %ymm0 | |||
vpxor %ymm1, %ymm5, %ymm1 | |||
vpxor %ymm1, %ymm9, %ymm1 | |||
vpxor %ymm0, %ymm5, %ymm5 | |||
vpxor %ymm1, %ymm8, %ymm8 | |||
vpand mask1000(%rip), %ymm5, %ymm13 | |||
vpand mask0111(%rip), %ymm8, %ymm12 | |||
vpxor %ymm12, %ymm13, %ymm12 | |||
vpsrlq $61, %ymm12, %ymm12 | |||
vpermq $147, %ymm12, %ymm12 | |||
vpxor %ymm12, %ymm2, %ymm2 | |||
vpsllq $3, %ymm8, %ymm12 | |||
vpxor %ymm12, %ymm2, %ymm2 | |||
vpand mask1000(%rip), %ymm8, %ymm13 | |||
vpand mask0111(%rip), %ymm9, %ymm12 | |||
vpxor %ymm12, %ymm13, %ymm12 | |||
vpsrlq $61, %ymm12, %ymm12 | |||
vpermq $147, %ymm12, %ymm12 | |||
vpxor %ymm12, %ymm5, %ymm5 | |||
vpsllq $3, %ymm9, %ymm12 | |||
vpxor %ymm12, %ymm5, %ymm5 | |||
vpand low253(%rip), %ymm5, %ymm5 | |||
vmovdqa %ymm2, 0(%rdi) | |||
vmovdqa %ymm5, 32(%rdi) | |||
ret |
@@ -1,840 +0,0 @@ | |||
.data | |||
.p2align 5 | |||
mask_modq: | |||
.word 2047 | |||
.word 2047 | |||
.word 2047 | |||
.word 2047 | |||
.word 2047 | |||
.word 2047 | |||
.word 2047 | |||
.word 2047 | |||
.word 2047 | |||
.word 2047 | |||
.word 2047 | |||
.word 2047 | |||
.word 2047 | |||
.word 2047 | |||
.word 2047 | |||
.word 2047 | |||
mask_ff: | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
mask_f: | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
mask_3: | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.text | |||
.global PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_to_S3 | |||
.global _PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_to_S3 | |||
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_to_S3: | |||
_PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_to_S3: | |||
vmovdqa mask_modq(%rip), %ymm6 | |||
vmovdqa 992(%rsi), %ymm5 | |||
vpand %ymm6, %ymm5, %ymm5 | |||
vpermq $3, %ymm5, %ymm5 | |||
vpslld $16, %ymm5, %ymm1 | |||
vpsrld $16, %ymm1, %ymm5 | |||
vpor %ymm5, %ymm1, %ymm5 | |||
vbroadcastss %xmm5, %ymm5 | |||
vpsrlw $10, %ymm5, %ymm1 | |||
vpaddw %ymm5, %ymm1, %ymm5 | |||
vpsrlw $8, %ymm5, %ymm1 | |||
vpand mask_ff(%rip), %ymm5, %ymm5 | |||
vpaddw %ymm1, %ymm5, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm5 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm5, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm5 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm5, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm5 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm5, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm5 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm5, %ymm1 | |||
vpsllw $1, %ymm1, %ymm5 | |||
vmovdqa 0(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 0(%rdi) | |||
vmovdqa 32(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 32(%rdi) | |||
vmovdqa 64(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 64(%rdi) | |||
vmovdqa 96(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 96(%rdi) | |||
vmovdqa 128(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 128(%rdi) | |||
vmovdqa 160(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 160(%rdi) | |||
vmovdqa 192(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 192(%rdi) | |||
vmovdqa 224(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 224(%rdi) | |||
vmovdqa 256(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 256(%rdi) | |||
vmovdqa 288(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 288(%rdi) | |||
vmovdqa 320(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 320(%rdi) | |||
vmovdqa 352(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 352(%rdi) | |||
vmovdqa 384(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 384(%rdi) | |||
vmovdqa 416(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 416(%rdi) | |||
vmovdqa 448(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 448(%rdi) | |||
vmovdqa 480(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 480(%rdi) | |||
vmovdqa 512(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 512(%rdi) | |||
vmovdqa 544(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 544(%rdi) | |||
vmovdqa 576(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 576(%rdi) | |||
vmovdqa 608(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 608(%rdi) | |||
vmovdqa 640(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 640(%rdi) | |||
vmovdqa 672(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 672(%rdi) | |||
vmovdqa 704(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 704(%rdi) | |||
vmovdqa 736(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 736(%rdi) | |||
vmovdqa 768(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 768(%rdi) | |||
vmovdqa 800(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 800(%rdi) | |||
vmovdqa 832(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 832(%rdi) | |||
vmovdqa 864(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 864(%rdi) | |||
vmovdqa 896(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 896(%rdi) | |||
vmovdqa 928(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 928(%rdi) | |||
vmovdqa 960(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 960(%rdi) | |||
vmovdqa 992(%rsi), %ymm0 | |||
vpand %ymm6, %ymm0, %ymm0 | |||
vpsrlw $10, %ymm0, %ymm1 | |||
vpaddw %ymm0, %ymm1, %ymm0 | |||
vpaddw %ymm0, %ymm5, %ymm0 | |||
vpsrlw $8, %ymm0, %ymm1 | |||
vpand mask_ff(%rip), %ymm0, %ymm0 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_f(%rip), %ymm1, %ymm0 | |||
vpsrlw $4, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpand mask_3(%rip), %ymm1, %ymm0 | |||
vpsrlw $2, %ymm1, %ymm1 | |||
vpaddw %ymm1, %ymm0, %ymm1 | |||
vpsubw mask_3(%rip), %ymm1, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm0 | |||
vpand %ymm15, %ymm1, %ymm14 | |||
vpxor %ymm14, %ymm0, %ymm1 | |||
vmovdqa %ymm1, 992(%rdi) | |||
ret |
@@ -1,463 +0,0 @@ | |||
#include "poly.h" | |||
#include <immintrin.h> | |||
typedef signed char small; | |||
#define p 508 | |||
#define ppad 512 | |||
#define numvec 2 | |||
typedef __m256i vec256; | |||
/* | |||
This code stores 512-coeff poly as vec256[2]. | |||
Order of 256 coefficients in each vec256 | |||
is optimized in light of costs of vector instructions: | |||
0,4,...,252 in 64-bit word; | |||
1,5,...,253 in 64-bit word; | |||
2,6,...,254 in 64-bit word; | |||
3,7,...,255 in 64-bit word. | |||
*/ | |||
static inline void vec256_frombits(vec256 *v, const small *b) { | |||
int i; | |||
for (i = 0; i < numvec; ++i) { | |||
vec256 b0 = _mm256_loadu_si256((vec256 *) b); | |||
b += 32; /* 0,1,...,31 */ | |||
vec256 b1 = _mm256_loadu_si256((vec256 *) b); | |||
b += 32; /* 32,33,... */ | |||
vec256 b2 = _mm256_loadu_si256((vec256 *) b); | |||
b += 32; | |||
vec256 b3 = _mm256_loadu_si256((vec256 *) b); | |||
b += 32; | |||
vec256 b4 = _mm256_loadu_si256((vec256 *) b); | |||
b += 32; | |||
vec256 b5 = _mm256_loadu_si256((vec256 *) b); | |||
b += 32; | |||
vec256 b6 = _mm256_loadu_si256((vec256 *) b); | |||
b += 32; | |||
vec256 b7 = _mm256_loadu_si256((vec256 *) b); | |||
b += 32; | |||
vec256 c0 = _mm256_unpacklo_epi32(b0, b1); /* 0 1 2 3 32 33 34 35 4 5 6 7 36 37 38 39 ... 55 */ | |||
vec256 c1 = _mm256_unpackhi_epi32(b0, b1); /* 8 9 10 11 40 41 42 43 ... 63 */ | |||
vec256 c2 = _mm256_unpacklo_epi32(b2, b3); | |||
vec256 c3 = _mm256_unpackhi_epi32(b2, b3); | |||
vec256 c4 = _mm256_unpacklo_epi32(b4, b5); | |||
vec256 c5 = _mm256_unpackhi_epi32(b4, b5); | |||
vec256 c6 = _mm256_unpacklo_epi32(b6, b7); | |||
vec256 c7 = _mm256_unpackhi_epi32(b6, b7); | |||
vec256 d0 = c0 | _mm256_slli_epi32(c1, 2); /* 0 8, 1 9, 2 10, 3 11, 32 40, 33 41, ..., 55 63 */ | |||
vec256 d2 = c2 | _mm256_slli_epi32(c3, 2); | |||
vec256 d4 = c4 | _mm256_slli_epi32(c5, 2); | |||
vec256 d6 = c6 | _mm256_slli_epi32(c7, 2); | |||
vec256 e0 = _mm256_unpacklo_epi64(d0, d2); | |||
vec256 e2 = _mm256_unpackhi_epi64(d0, d2); | |||
vec256 e4 = _mm256_unpacklo_epi64(d4, d6); | |||
vec256 e6 = _mm256_unpackhi_epi64(d4, d6); | |||
vec256 f0 = e0 | _mm256_slli_epi32(e2, 1); | |||
vec256 f4 = e4 | _mm256_slli_epi32(e6, 1); | |||
vec256 g0 = _mm256_permute2x128_si256(f0, f4, 0x20); | |||
vec256 g4 = _mm256_permute2x128_si256(f0, f4, 0x31); | |||
vec256 h = g0 | _mm256_slli_epi32(g4, 4); | |||
#define TRANSPOSE _mm256_set_epi8( 31,27,23,19, 30,26,22,18, 29,25,21,17, 28,24,20,16, 15,11,7,3, 14,10,6,2, 13,9,5,1, 12,8,4,0 ) | |||
h = _mm256_shuffle_epi8(h, TRANSPOSE); | |||
h = _mm256_permute4x64_epi64(h, 0xd8); | |||
h = _mm256_shuffle_epi32(h, 0xd8); | |||
*v++ = h; | |||
} | |||
} | |||
static inline void vec256_tobits(const vec256 *v, small *b) { | |||
int i; | |||
for (i = 0; i < numvec; ++i) { | |||
vec256 h = *v++; | |||
h = _mm256_shuffle_epi32(h, 0xd8); | |||
h = _mm256_permute4x64_epi64(h, 0xd8); | |||
h = _mm256_shuffle_epi8(h, TRANSPOSE); | |||
vec256 g0 = h & _mm256_set1_epi8(15); | |||
vec256 g4 = _mm256_srli_epi32(h, 4) & _mm256_set1_epi8(15); | |||
vec256 f0 = _mm256_permute2x128_si256(g0, g4, 0x20); | |||
vec256 f4 = _mm256_permute2x128_si256(g0, g4, 0x31); | |||
vec256 e0 = f0 & _mm256_set1_epi8(5); | |||
vec256 e2 = _mm256_srli_epi32(f0, 1) & _mm256_set1_epi8(5); | |||
vec256 e4 = f4 & _mm256_set1_epi8(5); | |||
vec256 e6 = _mm256_srli_epi32(f4, 1) & _mm256_set1_epi8(5); | |||
vec256 d0 = _mm256_unpacklo_epi32(e0, e2); | |||
vec256 d2 = _mm256_unpackhi_epi32(e0, e2); | |||
vec256 d4 = _mm256_unpacklo_epi32(e4, e6); | |||
vec256 d6 = _mm256_unpackhi_epi32(e4, e6); | |||
vec256 c0 = d0 & _mm256_set1_epi8(1); | |||
vec256 c1 = _mm256_srli_epi32(d0, 2) & _mm256_set1_epi8(1); | |||
vec256 c2 = d2 & _mm256_set1_epi8(1); | |||
vec256 c3 = _mm256_srli_epi32(d2, 2) & _mm256_set1_epi8(1); | |||
vec256 c4 = d4 & _mm256_set1_epi8(1); | |||
vec256 c5 = _mm256_srli_epi32(d4, 2) & _mm256_set1_epi8(1); | |||
vec256 c6 = d6 & _mm256_set1_epi8(1); | |||
vec256 c7 = _mm256_srli_epi32(d6, 2) & _mm256_set1_epi8(1); | |||
vec256 b0 = _mm256_unpacklo_epi64(c0, c1); | |||
vec256 b1 = _mm256_unpackhi_epi64(c0, c1); | |||
vec256 b2 = _mm256_unpacklo_epi64(c2, c3); | |||
vec256 b3 = _mm256_unpackhi_epi64(c2, c3); | |||
vec256 b4 = _mm256_unpacklo_epi64(c4, c5); | |||
vec256 b5 = _mm256_unpackhi_epi64(c4, c5); | |||
vec256 b6 = _mm256_unpacklo_epi64(c6, c7); | |||
vec256 b7 = _mm256_unpackhi_epi64(c6, c7); | |||
_mm256_storeu_si256((vec256 *) b, b0); | |||
b += 32; | |||
_mm256_storeu_si256((vec256 *) b, b1); | |||
b += 32; | |||
_mm256_storeu_si256((vec256 *) b, b2); | |||
b += 32; | |||
_mm256_storeu_si256((vec256 *) b, b3); | |||
b += 32; | |||
_mm256_storeu_si256((vec256 *) b, b4); | |||
b += 32; | |||
_mm256_storeu_si256((vec256 *) b, b5); | |||
b += 32; | |||
_mm256_storeu_si256((vec256 *) b, b6); | |||
b += 32; | |||
_mm256_storeu_si256((vec256 *) b, b7); | |||
b += 32; | |||
} | |||
} | |||
static void vec256_init(vec256 *G0, vec256 *G1, const small *s) { | |||
int i; | |||
small srev[ppad + (ppad - p)]; | |||
small si; | |||
small g0[ppad]; | |||
small g1[ppad]; | |||
for (i = 0; i < p; ++i) { | |||
srev[ppad - 1 - i] = s[i]; | |||
} | |||
for (i = 0; i < ppad - p; ++i) { | |||
srev[i] = 0; | |||
} | |||
for (i = p; i < ppad; ++i) { | |||
srev[i + ppad - p] = 0; | |||
} | |||
for (i = 0; i < ppad; ++i) { | |||
si = srev[i + ppad - p]; | |||
g0[i] = si & 1; | |||
g1[i] = (si >> 1) & g0[i]; | |||
} | |||
vec256_frombits(G0, g0); | |||
vec256_frombits(G1, g1); | |||
} | |||
static void vec256_final(small *out, const vec256 *V0, const vec256 *V1) { | |||
int i; | |||
small v0[ppad]; | |||
small v1[ppad]; | |||
small v[ppad]; | |||
small vrev[ppad + (ppad - p)]; | |||
vec256_tobits(V0, v0); | |||
vec256_tobits(V1, v1); | |||
for (i = 0; i < ppad; ++i) { | |||
v[i] = v0[i] + 2 * v1[i] - 4 * (v0[i] & v1[i]); | |||
} | |||
for (i = 0; i < ppad; ++i) { | |||
vrev[i] = v[ppad - 1 - i]; | |||
} | |||
for (i = ppad; i < ppad + (ppad - p); ++i) { | |||
vrev[i] = 0; | |||
} | |||
for (i = 0; i < p; ++i) { | |||
out[i] = vrev[i + ppad - p]; | |||
} | |||
} | |||
static inline int negative_mask(int x) { | |||
return x >> 31; | |||
} | |||
static inline void vec256_swap(vec256 *f, vec256 *g, int len, vec256 mask) { | |||
vec256 flip; | |||
int i; | |||
for (i = 0; i < len; ++i) { | |||
flip = mask & (f[i] ^ g[i]); | |||
f[i] ^= flip; | |||
g[i] ^= flip; | |||
} | |||
} | |||
static inline void vec256_scale(vec256 *f0, vec256 *f1, const vec256 c0, const vec256 c1) { | |||
int i; | |||
for (i = 0; i < numvec; ++i) { | |||
vec256 f0i = f0[i]; | |||
vec256 f1i = f1[i]; | |||
f0i &= c0; | |||
f1i ^= c1; | |||
f1i &= f0i; | |||
f0[i] = f0i; | |||
f1[i] = f1i; | |||
} | |||
} | |||
static inline void vec256_eliminate(vec256 *f0, vec256 *f1, vec256 *g0, vec256 *g1, int len, const vec256 c0, const vec256 c1) { | |||
int i; | |||
for (i = 0; i < len; ++i) { | |||
vec256 f0i = f0[i]; | |||
vec256 f1i = f1[i]; | |||
vec256 g0i = g0[i]; | |||
vec256 g1i = g1[i]; | |||
vec256 t; | |||
f0i &= c0; | |||
f1i ^= c1; | |||
f1i &= f0i; | |||
t = g0i ^ f0i; | |||
g0[i] = t | (g1i ^ f1i); | |||
g1[i] = (g1i ^ f0i) & (f1i ^ t); | |||
} | |||
} | |||
static inline int vec256_bit0mask(vec256 *f) { | |||
return -(_mm_cvtsi128_si32(_mm256_castsi256_si128(f[0])) & 1); | |||
} | |||
static inline void vec256_divx_1(vec256 *f) { | |||
vec256 f0 = f[0]; | |||
unsigned long long low0 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f0)); | |||
low0 = low0 >> 1; | |||
f0 = _mm256_blend_epi32(f0, _mm256_set_epi64x(0, 0, 0, low0), 0x3); | |||
f[0] = _mm256_permute4x64_epi64(f0, 0x39); | |||
} | |||
static inline void vec256_divx_2(vec256 *f) { | |||
vec256 f0 = f[0]; | |||
vec256 f1 = f[1]; | |||
unsigned long long low0 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f0)); | |||
unsigned long long low1 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f1)); | |||
low0 = (low0 >> 1) | (low1 << 63); | |||
low1 = low1 >> 1; | |||
f0 = _mm256_blend_epi32(f0, _mm256_set_epi64x(0, 0, 0, low0), 0x3); | |||
f1 = _mm256_blend_epi32(f1, _mm256_set_epi64x(0, 0, 0, low1), 0x3); | |||
f[0] = _mm256_permute4x64_epi64(f0, 0x39); | |||
f[1] = _mm256_permute4x64_epi64(f1, 0x39); | |||
} | |||
static inline void vec256_timesx_1(vec256 *f) { | |||
vec256 f0 = _mm256_permute4x64_epi64(f[0], 0x93); | |||
unsigned long long low0 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f0)); | |||
low0 = low0 << 1; | |||
f0 = _mm256_blend_epi32(f0, _mm256_set_epi64x(0, 0, 0, low0), 0x3); | |||
f[0] = f0; | |||
} | |||
static inline void vec256_timesx_2(vec256 *f) { | |||
vec256 f0 = _mm256_permute4x64_epi64(f[0], 0x93); | |||
vec256 f1 = _mm256_permute4x64_epi64(f[1], 0x93); | |||
unsigned long long low0 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f0)); | |||
unsigned long long low1 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f1)); | |||
low1 = (low1 << 1) | (low0 >> 63); | |||
low0 = low0 << 1; | |||
f0 = _mm256_blend_epi32(f0, _mm256_set_epi64x(0, 0, 0, low0), 0x3); | |||
f1 = _mm256_blend_epi32(f1, _mm256_set_epi64x(0, 0, 0, low1), 0x3); | |||
f[0] = f0; | |||
f[1] = f1; | |||
} | |||
static int __poly_S3_inv(unsigned char *outbytes, const unsigned char *inbytes) { | |||
small *out = (void *) outbytes; | |||
small *in = (void *) inbytes; | |||
vec256 F0[numvec]; | |||
vec256 F1[numvec]; | |||
vec256 G0[numvec]; | |||
vec256 G1[numvec]; | |||
vec256 V0[numvec]; | |||
vec256 V1[numvec]; | |||
vec256 R0[numvec]; | |||
vec256 R1[numvec]; | |||
vec256 c0vec, c1vec; | |||
int loop; | |||
int c0, c1; | |||
int minusdelta = -1; | |||
int swapmask; | |||
vec256 swapvec; | |||
vec256_init(G0, G1, in); | |||
F0[0] = _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1); | |||
F0[1] = _mm256_set_epi32(2147483647, -1, 2147483647, -1, 2147483647, -1, -1, -1); | |||
F1[0] = _mm256_set1_epi32(0); | |||
F1[1] = _mm256_set1_epi32(0); | |||
V0[0] = _mm256_set1_epi32(0); | |||
V1[0] = _mm256_set1_epi32(0); | |||
V0[1] = _mm256_set1_epi32(0); | |||
V1[1] = _mm256_set1_epi32(0); | |||
R0[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, 1); | |||
R1[0] = _mm256_set1_epi32(0); | |||
R0[1] = _mm256_set1_epi32(0); | |||
R1[1] = _mm256_set1_epi32(0); | |||
for (loop = 256; loop > 0; --loop) { | |||
vec256_timesx_1(V0); | |||
vec256_timesx_1(V1); | |||
swapmask = negative_mask(minusdelta) & vec256_bit0mask(G0); | |||
c0 = vec256_bit0mask(F0) & vec256_bit0mask(G0); | |||
c1 = vec256_bit0mask(F1) ^ vec256_bit0mask(G1); | |||
c1 &= c0; | |||
minusdelta ^= swapmask & (minusdelta ^ -minusdelta); | |||
minusdelta -= 1; | |||
swapvec = _mm256_set1_epi32(swapmask); | |||
vec256_swap(F0, G0, 2, swapvec); | |||
vec256_swap(F1, G1, 2, swapvec); | |||
c0vec = _mm256_set1_epi32(c0); | |||
c1vec = _mm256_set1_epi32(c1); | |||
vec256_eliminate(F0, F1, G0, G1, 2, c0vec, c1vec); | |||
vec256_divx_2(G0); | |||
vec256_divx_2(G1); | |||
vec256_swap(V0, R0, 1, swapvec); | |||
vec256_swap(V1, R1, 1, swapvec); | |||
vec256_eliminate(V0, V1, R0, R1, 1, c0vec, c1vec); | |||
} | |||
for (loop = 503; loop > 0; --loop) { | |||
vec256_timesx_2(V0); | |||
vec256_timesx_2(V1); | |||
swapmask = negative_mask(minusdelta) & vec256_bit0mask(G0); | |||
c0 = vec256_bit0mask(F0) & vec256_bit0mask(G0); | |||
c1 = vec256_bit0mask(F1) ^ vec256_bit0mask(G1); | |||
c1 &= c0; | |||
minusdelta ^= swapmask & (minusdelta ^ -minusdelta); | |||
minusdelta -= 1; | |||
swapvec = _mm256_set1_epi32(swapmask); | |||
vec256_swap(F0, G0, 2, swapvec); | |||
vec256_swap(F1, G1, 2, swapvec); | |||
c0vec = _mm256_set1_epi32(c0); | |||
c1vec = _mm256_set1_epi32(c1); | |||
vec256_eliminate(F0, F1, G0, G1, 2, c0vec, c1vec); | |||
vec256_divx_2(G0); | |||
vec256_divx_2(G1); | |||
vec256_swap(V0, R0, 2, swapvec); | |||
vec256_swap(V1, R1, 2, swapvec); | |||
vec256_eliminate(V0, V1, R0, R1, 2, c0vec, c1vec); | |||
} | |||
for (loop = 256; loop > 0; --loop) { | |||
vec256_timesx_2(V0); | |||
vec256_timesx_2(V1); | |||
swapmask = negative_mask(minusdelta) & vec256_bit0mask(G0); | |||
c0 = vec256_bit0mask(F0) & vec256_bit0mask(G0); | |||
c1 = vec256_bit0mask(F1) ^ vec256_bit0mask(G1); | |||
c1 &= c0; | |||
minusdelta ^= swapmask & (minusdelta ^ -minusdelta); | |||
minusdelta -= 1; | |||
swapvec = _mm256_set1_epi32(swapmask); | |||
vec256_swap(F0, G0, 1, swapvec); | |||
vec256_swap(F1, G1, 1, swapvec); | |||
c0vec = _mm256_set1_epi32(c0); | |||
c1vec = _mm256_set1_epi32(c1); | |||
vec256_eliminate(F0, F1, G0, G1, 1, c0vec, c1vec); | |||
vec256_divx_1(G0); | |||
vec256_divx_1(G1); | |||
vec256_swap(V0, R0, 2, swapvec); | |||
vec256_swap(V1, R1, 2, swapvec); | |||
vec256_eliminate(V0, V1, R0, R1, 2, c0vec, c1vec); | |||
} | |||
c0vec = _mm256_set1_epi32(vec256_bit0mask(F0)); | |||
c1vec = _mm256_set1_epi32(vec256_bit0mask(F1)); | |||
vec256_scale(V0, V1, c0vec, c1vec); | |||
vec256_final(out, V0, V1); | |||
out[p] = negative_mask(minusdelta); | |||
return 0; | |||
} | |||
// This code is based on crypto_core/invhrss701/faster from SUPERCOP. The code was written as a case study | |||
// for the paper "Fast constant-time gcd computation and modular inversion" by Daniel J. Bernstein and Bo-Yin Yang. | |||
void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_inv(poly *r_out, const poly *a) { | |||
const unsigned char *in = (void *) a; | |||
unsigned char *out = (void *) r_out; | |||
small input[ppad]; | |||
small output[ppad]; | |||
int i; | |||
/* XXX: obviously input/output format should be packed into bytes */ | |||
for (i = 0; i < p; ++i) { | |||
small x = in[2 * i] & 3; /* 0 1 2 3 */ | |||
x += 1; /* 0 1 2 3 4 5 6, offset by 1 */ | |||
x &= (x - 3) >> 5; /* 0 1 2, offset by 1 */ | |||
input[i] = x - 1; | |||
} | |||
/* XXX: merge with vec256_init */ | |||
__poly_S3_inv((unsigned char *)output, (unsigned char *)input); | |||
for (i = 0; i < p; ++i) { | |||
out[2 * i] = (3 & output[i]) ^ ((3 & output[i]) >> 1); | |||
out[2 * i + 1] = 0; | |||
} | |||
} |
@@ -1,45 +0,0 @@ | |||
#include "sample.h" | |||
void PQCLEAN_NTRUHPS2048509_AVX2_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]) { | |||
PQCLEAN_NTRUHPS2048509_AVX2_sample_iid(f, uniformbytes); | |||
PQCLEAN_NTRUHPS2048509_AVX2_sample_fixed_type(g, uniformbytes + NTRU_SAMPLE_IID_BYTES); | |||
} | |||
void PQCLEAN_NTRUHPS2048509_AVX2_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]) { | |||
PQCLEAN_NTRUHPS2048509_AVX2_sample_iid(r, uniformbytes); | |||
PQCLEAN_NTRUHPS2048509_AVX2_sample_fixed_type(m, uniformbytes + NTRU_SAMPLE_IID_BYTES); | |||
} | |||
void PQCLEAN_NTRUHPS2048509_AVX2_sample_fixed_type(poly *r, const unsigned char u[NTRU_SAMPLE_FT_BYTES]) { | |||
// Assumes NTRU_SAMPLE_FT_BYTES = ceil(30*(n-1)/8) | |||
int32_t s[NTRU_N - 1]; | |||
int i; | |||
// Use 30 bits of u per word | |||
for (i = 0; i < (NTRU_N - 1) / 4; i++) { | |||
s[4 * i + 0] = (u[15 * i + 0] << 2) + (u[15 * i + 1] << 10) + (u[15 * i + 2] << 18) + ((uint32_t) u[15 * i + 3] << 26); | |||
s[4 * i + 1] = ((u[15 * i + 3] & 0xc0) >> 4) + (u[15 * i + 4] << 4) + (u[15 * i + 5] << 12) + (u[15 * i + 6] << 20) + ((uint32_t) u[15 * i + 7] << 28); | |||
s[4 * i + 2] = ((u[15 * i + 7] & 0xf0) >> 2) + (u[15 * i + 8] << 6) + (u[15 * i + 9] << 14) + (u[15 * i + 10] << 22) + ((uint32_t) u[15 * i + 11] << 30); | |||
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 16) + ((uint32_t) u[15 * i + 14] << 24); | |||
} | |||
for (i = 0; i < NTRU_WEIGHT / 2; i++) { | |||
s[i] |= 1; | |||
} | |||
for (i = NTRU_WEIGHT / 2; i < NTRU_WEIGHT; i++) { | |||
s[i] |= 2; | |||
} | |||
PQCLEAN_NTRUHPS2048509_AVX2_crypto_sort_int32(s, NTRU_N - 1); | |||
for (i = 0; i < NTRU_N - 1; i++) { | |||
r->coeffs[i] = ((uint16_t) (s[i] & 3)); | |||
} | |||
r->coeffs[NTRU_N - 1] = 0; | |||
} |
@@ -1,17 +0,0 @@ | |||
#ifndef SAMPLE_H | |||
#define SAMPLE_H | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "crypto_sort_int32.h" | |||
void PQCLEAN_NTRUHPS2048509_AVX2_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_sample_fixed_type(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_FT_BYTES]); | |||
#endif |
@@ -1,21 +0,0 @@ | |||
#include <immintrin.h> | |||
#include "sample.h" | |||
extern void PQCLEAN_NTRUHPS2048509_AVX2_vec32_sample_iid(poly *r, const unsigned char uniformbytes[PAD32(NTRU_SAMPLE_IID_BYTES)]); | |||
void PQCLEAN_NTRUHPS2048509_AVX2_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]) { | |||
int i; | |||
union { /* align to 32 byte boundary for vmovdqa */ | |||
unsigned char b[PAD32(NTRU_SAMPLE_IID_BYTES)]; | |||
__m256i b_x32[PAD32(NTRU_SAMPLE_IID_BYTES) / 32]; | |||
} buffer; | |||
for (i = 0; i < NTRU_SAMPLE_IID_BYTES; i++) { | |||
buffer.b[i] = uniformbytes[i]; | |||
} | |||
for (i = NTRU_SAMPLE_IID_BYTES; i < PAD32(NTRU_SAMPLE_IID_BYTES); i++) { | |||
buffer.b[i] = 0; | |||
} | |||
PQCLEAN_NTRUHPS2048509_AVX2_vec32_sample_iid(r, buffer.b); | |||
} |
@@ -1,109 +0,0 @@ | |||
.data | |||
.p2align 5 | |||
.text | |||
.global PQCLEAN_NTRUHPS2048509_AVX2_square_1_509 | |||
.global _PQCLEAN_NTRUHPS2048509_AVX2_square_1_509 | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_1_509: | |||
_PQCLEAN_NTRUHPS2048509_AVX2_square_1_509: | |||
push %r15 | |||
push %r14 | |||
push %r13 | |||
push %r12 | |||
push %rbx | |||
push %rbp | |||
mov 0(%rsi), %r11 | |||
mov %r11, %r10 | |||
and $-0x1, %r10 | |||
mov $0x5555555555555555, %rbp | |||
pdep %rbp, %r10, %r10 | |||
mov %r10, 0(%rdi) | |||
mov $0xffffffff00000000, %rbx | |||
pext %rbx, %r11, %r10 | |||
pdep %rbp, %r10, %r10 | |||
mov %r10, 8(%rdi) | |||
mov 8(%rsi), %r11 | |||
mov %r11, %r10 | |||
and $-0x1, %r10 | |||
pdep %rbp, %r10, %r10 | |||
mov %r10, 16(%rdi) | |||
pext %rbx, %r11, %r10 | |||
pdep %rbp, %r10, %r10 | |||
mov %r10, 24(%rdi) | |||
mov 16(%rsi), %r11 | |||
mov %r11, %r10 | |||
and $-0x1, %r10 | |||
pdep %rbp, %r10, %r10 | |||
mov %r10, 32(%rdi) | |||
pext %rbx, %r11, %r10 | |||
pdep %rbp, %r10, %r10 | |||
mov %r10, 40(%rdi) | |||
mov 24(%rsi), %r11 | |||
mov %r11, %r10 | |||
rol $2, %r10 | |||
and $0x2, %r10 | |||
xor %r10, 0(%rdi) | |||
mov %r11, %r10 | |||
and $-0x1, %r10 | |||
pdep %rbp, %r10, %r10 | |||
mov %r10, 48(%rdi) | |||
mov $0x7fffffff00000000, %r12 | |||
pext %r12, %r11, %r10 | |||
mov $0x1555555555555555, %r13 | |||
pdep %r13, %r10, %r10 | |||
mov %r10, 56(%rdi) | |||
mov 32(%rsi), %r11 | |||
mov %r11, %r10 | |||
and $0x7fffffff, %r10 | |||
mov $0xaaaaaaaaaaaaaaa8, %r14 | |||
pdep %r14, %r10, %r10 | |||
xor %r10, 0(%rdi) | |||
mov $0x7fffffff80000000, %r15 | |||
pext %r15, %r11, %r10 | |||
mov $0xaaaaaaaaaaaaaaaa, %r9 | |||
pdep %r9, %r10, %r10 | |||
xor %r10, 8(%rdi) | |||
mov %r11, %r10 | |||
rol $2, %r10 | |||
and $0x2, %r10 | |||
xor %r10, 16(%rdi) | |||
mov 40(%rsi), %r11 | |||
mov %r11, %r10 | |||
and $0x7fffffff, %r10 | |||
pdep %r14, %r10, %r10 | |||
xor %r10, 16(%rdi) | |||
pext %r15, %r11, %r10 | |||
pdep %r9, %r10, %r10 | |||
xor %r10, 24(%rdi) | |||
mov %r11, %r10 | |||
rol $2, %r10 | |||
and $0x2, %r10 | |||
xor %r10, 32(%rdi) | |||
mov 48(%rsi), %r11 | |||
mov %r11, %r10 | |||
and $0x7fffffff, %r10 | |||
pdep %r14, %r10, %r10 | |||
xor %r10, 32(%rdi) | |||
pext %r15, %r11, %r10 | |||
pdep %r9, %r10, %r10 | |||
xor %r10, 40(%rdi) | |||
mov %r11, %r10 | |||
rol $2, %r10 | |||
and $0x2, %r10 | |||
xor %r10, 48(%rdi) | |||
mov 56(%rsi), %r11 | |||
mov %r11, %r10 | |||
and $0x7fffffff, %r10 | |||
pdep %r14, %r10, %r10 | |||
xor %r10, 48(%rdi) | |||
mov $0x1fffffff80000000, %r8 | |||
pext %r8, %r11, %r10 | |||
mov $0xaaaaaaaaaaaaaaa, %rdx | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 56(%rdi) | |||
pop %rbp | |||
pop %rbx | |||
pop %r12 | |||
pop %r13 | |||
pop %r14 | |||
pop %r15 | |||
ret |
@@ -1,272 +0,0 @@ | |||
.data | |||
.p2align 5 | |||
.text | |||
.global PQCLEAN_NTRUHPS2048509_AVX2_square_3_509 | |||
.global _PQCLEAN_NTRUHPS2048509_AVX2_square_3_509 | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_3_509: | |||
_PQCLEAN_NTRUHPS2048509_AVX2_square_3_509: | |||
push %r15 | |||
push %r14 | |||
push %r13 | |||
push %r12 | |||
push %rbx | |||
push %rbp | |||
mov 0(%rsi), %r11 | |||
mov %r11, %r10 | |||
and $0xff, %r10 | |||
mov $0x101010101010101, %rbp | |||
pdep %rbp, %r10, %r10 | |||
mov %r10, 0(%rdi) | |||
mov $0xff00, %rbx | |||
pext %rbx, %r11, %r10 | |||
pdep %rbp, %r10, %r10 | |||
mov %r10, 8(%rdi) | |||
mov $0xff0000, %r12 | |||
pext %r12, %r11, %r10 | |||
pdep %rbp, %r10, %r10 | |||
mov %r10, 16(%rdi) | |||
mov $0xff000000, %r13 | |||
pext %r13, %r11, %r10 | |||
pdep %rbp, %r10, %r10 | |||
mov %r10, 24(%rdi) | |||
mov $0xff00000000, %r14 | |||
pext %r14, %r11, %r10 | |||
pdep %rbp, %r10, %r10 | |||
mov %r10, 32(%rdi) | |||
mov $0xff0000000000, %r15 | |||
pext %r15, %r11, %r10 | |||
pdep %rbp, %r10, %r10 | |||
mov %r10, 40(%rdi) | |||
mov $0xff000000000000, %r9 | |||
pext %r9, %r11, %r10 | |||
pdep %rbp, %r10, %r10 | |||
mov %r10, 48(%rdi) | |||
mov $0xff00000000000000, %r8 | |||
pext %r8, %r11, %r10 | |||
pdep %rbp, %r10, %r10 | |||
mov %r10, 56(%rdi) | |||
mov 8(%rsi), %r11 | |||
mov %r11, %r10 | |||
and $0xff, %r10 | |||
mov $0x808080808080808, %rdx | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 0(%rdi) | |||
pext %rbx, %r11, %r10 | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 8(%rdi) | |||
pext %r12, %r11, %r10 | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 16(%rdi) | |||
pext %r13, %r11, %r10 | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 24(%rdi) | |||
pext %r14, %r11, %r10 | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 32(%rdi) | |||
pext %r15, %r11, %r10 | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 40(%rdi) | |||
pext %r9, %r11, %r10 | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 48(%rdi) | |||
pext %r8, %r11, %r10 | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 56(%rdi) | |||
mov 16(%rsi), %r11 | |||
mov $0x80000000000000ff, %rcx | |||
pext %rcx, %r11, %r10 | |||
mov $0x9010101010101010, %rax | |||
pdep %rax, %r10, %r10 | |||
rol $2, %r10 | |||
xor %r10, 0(%rdi) | |||
pext %rbx, %r11, %r10 | |||
mov $0x4040404040404040, %rbp | |||
pdep %rbp, %r10, %r10 | |||
xor %r10, 8(%rdi) | |||
pext %r12, %r11, %r10 | |||
pdep %rbp, %r10, %r10 | |||
xor %r10, 16(%rdi) | |||
pext %r13, %r11, %r10 | |||
pdep %rbp, %r10, %r10 | |||
xor %r10, 24(%rdi) | |||
pext %r14, %r11, %r10 | |||
pdep %rbp, %r10, %r10 | |||
xor %r10, 32(%rdi) | |||
pext %r15, %r11, %r10 | |||
pdep %rbp, %r10, %r10 | |||
xor %r10, 40(%rdi) | |||
pext %r9, %r11, %r10 | |||
pdep %rbp, %r10, %r10 | |||
xor %r10, 48(%rdi) | |||
mov $0x7f00000000000000, %r8 | |||
pext %r8, %r11, %r10 | |||
mov $0x40404040404040, %rdx | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 56(%rdi) | |||
mov 24(%rsi), %r11 | |||
mov $0x800000000000007f, %rcx | |||
pext %rcx, %r11, %r10 | |||
mov $0x8010101010101010, %rax | |||
pdep %rax, %r10, %r10 | |||
rol $5, %r10 | |||
xor %r10, 0(%rdi) | |||
mov $0x7f80, %rbx | |||
pext %rbx, %r11, %r10 | |||
mov $0x202020202020202, %r12 | |||
pdep %r12, %r10, %r10 | |||
xor %r10, 8(%rdi) | |||
mov $0x7f8000, %r13 | |||
pext %r13, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
xor %r10, 16(%rdi) | |||
mov $0x7f800000, %r14 | |||
pext %r14, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
xor %r10, 24(%rdi) | |||
mov $0x7f80000000, %r15 | |||
pext %r15, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
xor %r10, 32(%rdi) | |||
mov $0x7f8000000000, %r9 | |||
pext %r9, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
xor %r10, 40(%rdi) | |||
mov $0x7f800000000000, %rbp | |||
pext %rbp, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
xor %r10, 48(%rdi) | |||
mov $0x7f80000000000000, %r8 | |||
pext %r8, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
xor %r10, 56(%rdi) | |||
mov 32(%rsi), %r11 | |||
pext %rcx, %r11, %r10 | |||
pdep %rax, %r10, %r10 | |||
rol $8, %r10 | |||
xor %r10, 0(%rdi) | |||
pext %rbx, %r11, %r10 | |||
mov $0x1010101010101010, %rdx | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 8(%rdi) | |||
pext %r13, %r11, %r10 | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 16(%rdi) | |||
pext %r14, %r11, %r10 | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 24(%rdi) | |||
pext %r15, %r11, %r10 | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 32(%rdi) | |||
pext %r9, %r11, %r10 | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 40(%rdi) | |||
pext %rbp, %r11, %r10 | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 48(%rdi) | |||
pext %r8, %r11, %r10 | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 56(%rdi) | |||
mov 40(%rsi), %r11 | |||
mov $0xc00000000000007f, %r12 | |||
pext %r12, %r11, %r10 | |||
mov $0x8090101010101010, %rcx | |||
pdep %rcx, %r10, %r10 | |||
rol $11, %r10 | |||
xor %r10, 0(%rdi) | |||
pext %rbx, %r11, %r10 | |||
mov $0x8080808080808080, %rax | |||
pdep %rax, %r10, %r10 | |||
xor %r10, 8(%rdi) | |||
pext %r13, %r11, %r10 | |||
pdep %rax, %r10, %r10 | |||
xor %r10, 16(%rdi) | |||
pext %r14, %r11, %r10 | |||
pdep %rax, %r10, %r10 | |||
xor %r10, 24(%rdi) | |||
pext %r15, %r11, %r10 | |||
pdep %rax, %r10, %r10 | |||
xor %r10, 32(%rdi) | |||
pext %r9, %r11, %r10 | |||
pdep %rax, %r10, %r10 | |||
xor %r10, 40(%rdi) | |||
pext %rbp, %r11, %r10 | |||
pdep %rax, %r10, %r10 | |||
xor %r10, 48(%rdi) | |||
mov $0x3f80000000000000, %r8 | |||
pext %r8, %r11, %r10 | |||
mov $0x80808080808080, %rdx | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 56(%rdi) | |||
mov 48(%rsi), %r11 | |||
mov $0xc00000000000003f, %r12 | |||
pext %r12, %r11, %r10 | |||
mov $0x8080101010101010, %rcx | |||
pdep %rcx, %r10, %r10 | |||
rol $14, %r10 | |||
xor %r10, 0(%rdi) | |||
mov $0x3fc0, %rbx | |||
pext %rbx, %r11, %r10 | |||
mov $0x404040404040404, %r13 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 8(%rdi) | |||
mov $0x3fc000, %r14 | |||
pext %r14, %r11, %r10 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 16(%rdi) | |||
mov $0x3fc00000, %r15 | |||
pext %r15, %r11, %r10 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 24(%rdi) | |||
mov $0x3fc0000000, %r9 | |||
pext %r9, %r11, %r10 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 32(%rdi) | |||
mov $0x3fc000000000, %rbp | |||
pext %rbp, %r11, %r10 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 40(%rdi) | |||
mov $0x3fc00000000000, %rax | |||
pext %rax, %r11, %r10 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 48(%rdi) | |||
mov $0x3fc0000000000000, %r8 | |||
pext %r8, %r11, %r10 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 56(%rdi) | |||
mov 56(%rsi), %r11 | |||
mov %r11, %r10 | |||
and $0x3f, %r10 | |||
mov $0x2020202020200000, %rdx | |||
pdep %rdx, %r10, %r10 | |||
xor %r10, 0(%rdi) | |||
pext %rbx, %r11, %r10 | |||
mov $0x2020202020202020, %r12 | |||
pdep %r12, %r10, %r10 | |||
xor %r10, 8(%rdi) | |||
pext %r14, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
xor %r10, 16(%rdi) | |||
pext %r15, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
xor %r10, 24(%rdi) | |||
pext %r9, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
xor %r10, 32(%rdi) | |||
pext %rbp, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
xor %r10, 40(%rdi) | |||
pext %rax, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
xor %r10, 48(%rdi) | |||
mov $0x1fc0000000000000, %rcx | |||
pext %rcx, %r11, %r10 | |||
mov $0x20202020202020, %r8 | |||
pdep %r8, %r10, %r10 | |||
xor %r10, 56(%rdi) | |||
pop %rbp | |||
pop %rbx | |||
pop %r12 | |||
pop %r13 | |||
pop %r14 | |||
pop %r15 | |||
ret |
@@ -1,296 +0,0 @@ | |||
.data | |||
.p2align 5 | |||
.text | |||
.global PQCLEAN_NTRUHPS2048509_AVX2_square_6_509 | |||
.global _PQCLEAN_NTRUHPS2048509_AVX2_square_6_509 | |||
PQCLEAN_NTRUHPS2048509_AVX2_square_6_509: | |||
_PQCLEAN_NTRUHPS2048509_AVX2_square_6_509: | |||
push %r15 | |||
push %r14 | |||
push %r13 | |||
push %r12 | |||
push %rbx | |||
push %rbp | |||
mov 0(%rsi), %r11 | |||
mov $0x101010101010101, %rbp | |||
pext %rbp, %r11, %r10 | |||
mov $0x249249, %rbx | |||
pdep %rbx, %r10, %r10 | |||
mov %r10, 0(%rdi) | |||
mov $0x202020202020202, %r12 | |||
pext %r12, %r11, %r10 | |||
pdep %rbx, %r10, %r10 | |||
mov %r10, 8(%rdi) | |||
mov $0x404040404040404, %r13 | |||
pext %r13, %r11, %r10 | |||
pdep %rbx, %r10, %r10 | |||
mov %r10, 16(%rdi) | |||
mov $0x808080808080808, %r14 | |||
pext %r14, %r11, %r10 | |||
pdep %rbx, %r10, %r10 | |||
mov %r10, 24(%rdi) | |||
mov $0x1010101010101010, %r15 | |||
pext %r15, %r11, %r10 | |||
pdep %rbx, %r10, %r10 | |||
mov %r10, 32(%rdi) | |||
mov $0x2020202020202020, %r9 | |||
pext %r9, %r11, %r10 | |||
pdep %rbx, %r10, %r10 | |||
mov %r10, 40(%rdi) | |||
mov $0x4040404040404040, %r8 | |||
pext %r8, %r11, %r10 | |||
pdep %rbx, %r10, %r10 | |||
mov %r10, 48(%rdi) | |||
mov $0x8080808080808080, %rdx | |||
pext %rdx, %r11, %r10 | |||
pdep %rbx, %r10, %r10 | |||
mov %r10, 56(%rdi) | |||
mov 8(%rsi), %r11 | |||
pext %rbp, %r11, %r10 | |||
mov $0x249249000000, %rcx | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 0(%rdi) | |||
pext %r12, %r11, %r10 | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 8(%rdi) | |||
pext %r13, %r11, %r10 | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 16(%rdi) | |||
pext %r14, %r11, %r10 | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 24(%rdi) | |||
pext %r15, %r11, %r10 | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 32(%rdi) | |||
pext %r9, %r11, %r10 | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 40(%rdi) | |||
pext %r8, %r11, %r10 | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 48(%rdi) | |||
pext %rdx, %r11, %r10 | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 56(%rdi) | |||
mov 16(%rsi), %r11 | |||
mov $0x8080810101010101, %rax | |||
pext %rax, %r11, %r10 | |||
mov $0x9249248000000000, %rbx | |||
pdep %rbx, %r10, %r10 | |||
rol $9, %r10 | |||
xor %r10, 0(%rdi) | |||
mov $0x101020202020202, %rbp | |||
pext %rbp, %r11, %r10 | |||
mov $0x9249240000000000, %r12 | |||
pdep %r12, %r10, %r10 | |||
rol $6, %r10 | |||
xor %r10, 8(%rdi) | |||
mov $0x202040404040404, %r13 | |||
pext %r13, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
rol $6, %r10 | |||
xor %r10, 16(%rdi) | |||
mov $0x404080808080808, %r14 | |||
pext %r14, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
rol $6, %r10 | |||
xor %r10, 24(%rdi) | |||
mov $0x808101010101010, %r15 | |||
pext %r15, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
rol $6, %r10 | |||
xor %r10, 32(%rdi) | |||
mov $0x1010202020202020, %r9 | |||
pext %r9, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
rol $6, %r10 | |||
xor %r10, 40(%rdi) | |||
mov $0x2020404040404040, %r8 | |||
pext %r8, %r11, %r10 | |||
pdep %r12, %r10, %r10 | |||
rol $6, %r10 | |||
xor %r10, 48(%rdi) | |||
mov $0x4040008080808080, %rdx | |||
pext %rdx, %r11, %r10 | |||
mov $0x9049240000000000, %rcx | |||
pdep %rcx, %r10, %r10 | |||
rol $6, %r10 | |||
xor %r10, 56(%rdi) | |||
mov 24(%rsi), %r11 | |||
mov $0x8080808080808080, %rax | |||
pext %rax, %r11, %r10 | |||
mov $0x124924800, %rbx | |||
pdep %rbx, %r10, %r10 | |||
xor %r10, 0(%rdi) | |||
mov $0x101010101010101, %rbp | |||
pext %rbp, %r11, %r10 | |||
mov $0x24924900, %r13 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 8(%rdi) | |||
mov $0x202020202020202, %r14 | |||
pext %r14, %r11, %r10 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 16(%rdi) | |||
mov $0x404040404040404, %r15 | |||
pext %r15, %r11, %r10 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 24(%rdi) | |||
mov $0x808080808080808, %r9 | |||
pext %r9, %r11, %r10 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 32(%rdi) | |||
mov $0x1010101010101010, %r8 | |||
pext %r8, %r11, %r10 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 40(%rdi) | |||
mov $0x2020202020202020, %r12 | |||
pext %r12, %r11, %r10 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 48(%rdi) | |||
mov $0x4040404040404040, %rdx | |||
pext %rdx, %r11, %r10 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 56(%rdi) | |||
mov 32(%rsi), %r11 | |||
pext %rax, %r11, %r10 | |||
mov $0x124924800000000, %rcx | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 0(%rdi) | |||
pext %rbp, %r11, %r10 | |||
mov $0x24924900000000, %rbx | |||
pdep %rbx, %r10, %r10 | |||
xor %r10, 8(%rdi) | |||
pext %r14, %r11, %r10 | |||
pdep %rbx, %r10, %r10 | |||
xor %r10, 16(%rdi) | |||
pext %r15, %r11, %r10 | |||
pdep %rbx, %r10, %r10 | |||
xor %r10, 24(%rdi) | |||
pext %r9, %r11, %r10 | |||
pdep %rbx, %r10, %r10 | |||
xor %r10, 32(%rdi) | |||
pext %r8, %r11, %r10 | |||
pdep %rbx, %r10, %r10 | |||
xor %r10, 40(%rdi) | |||
pext %r12, %r11, %r10 | |||
pdep %rbx, %r10, %r10 | |||
xor %r10, 48(%rdi) | |||
pext %rdx, %r11, %r10 | |||
pdep %rbx, %r10, %r10 | |||
xor %r10, 56(%rdi) | |||
mov 40(%rsi), %r11 | |||
mov $0x4040404040408080, %r13 | |||
pext %r13, %r11, %r10 | |||
mov $0x9249240000000000, %rax | |||
pdep %rax, %r10, %r10 | |||
rol $17, %r10 | |||
xor %r10, 0(%rdi) | |||
mov $0x8080808080810101, %rcx | |||
pext %rcx, %r11, %r10 | |||
mov $0x9249248000000000, %rbp | |||
pdep %rbp, %r10, %r10 | |||
rol $17, %r10 | |||
xor %r10, 8(%rdi) | |||
mov $0x101010101020202, %r14 | |||
pext %r14, %r11, %r10 | |||
pdep %rax, %r10, %r10 | |||
rol $14, %r10 | |||
xor %r10, 16(%rdi) | |||
mov $0x202020202040404, %r15 | |||
pext %r15, %r11, %r10 | |||
pdep %rax, %r10, %r10 | |||
rol $14, %r10 | |||
xor %r10, 24(%rdi) | |||
mov $0x404040404080808, %r9 | |||
pext %r9, %r11, %r10 | |||
pdep %rax, %r10, %r10 | |||
rol $14, %r10 | |||
xor %r10, 32(%rdi) | |||
mov $0x808080808101010, %r8 | |||
pext %r8, %r11, %r10 | |||
pdep %rax, %r10, %r10 | |||
rol $14, %r10 | |||
xor %r10, 40(%rdi) | |||
mov $0x1010101010202020, %r12 | |||
pext %r12, %r11, %r10 | |||
pdep %rax, %r10, %r10 | |||
rol $14, %r10 | |||
xor %r10, 48(%rdi) | |||
mov $0x2020202020004040, %rdx | |||
pext %rdx, %r11, %r10 | |||
mov $0x9248240000000000, %rbx | |||
pdep %rbx, %r10, %r10 | |||
rol $14, %r10 | |||
xor %r10, 56(%rdi) | |||
mov 48(%rsi), %r11 | |||
mov $0x4040404040404040, %r13 | |||
pext %r13, %r11, %r10 | |||
mov $0x12492480000, %rcx | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 0(%rdi) | |||
mov $0x8080808080808080, %rbp | |||
pext %rbp, %r11, %r10 | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 8(%rdi) | |||
mov $0x101010101010101, %r14 | |||
pext %r14, %r11, %r10 | |||
mov $0x2492490000, %r15 | |||
pdep %r15, %r10, %r10 | |||
xor %r10, 16(%rdi) | |||
mov $0x202020202020202, %r9 | |||
pext %r9, %r11, %r10 | |||
pdep %r15, %r10, %r10 | |||
xor %r10, 24(%rdi) | |||
mov $0x404040404040404, %r8 | |||
pext %r8, %r11, %r10 | |||
pdep %r15, %r10, %r10 | |||
xor %r10, 32(%rdi) | |||
mov $0x808080808080808, %r12 | |||
pext %r12, %r11, %r10 | |||
pdep %r15, %r10, %r10 | |||
xor %r10, 40(%rdi) | |||
mov $0x1010101010101010, %rax | |||
pext %rax, %r11, %r10 | |||
pdep %r15, %r10, %r10 | |||
xor %r10, 48(%rdi) | |||
mov $0x2020202020202020, %rdx | |||
pext %rdx, %r11, %r10 | |||
pdep %r15, %r10, %r10 | |||
xor %r10, 56(%rdi) | |||
mov 56(%rsi), %r11 | |||
mov $0x40404040404040, %rbx | |||
pext %rbx, %r11, %r10 | |||
mov $0x2492480000000000, %r13 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 0(%rdi) | |||
mov $0x80808080808080, %rbp | |||
pext %rbp, %r11, %r10 | |||
pdep %r13, %r10, %r10 | |||
xor %r10, 8(%rdi) | |||
pext %r14, %r11, %r10 | |||
mov $0x2492490000000000, %rcx | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 16(%rdi) | |||
pext %r9, %r11, %r10 | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 24(%rdi) | |||
pext %r8, %r11, %r10 | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 32(%rdi) | |||
pext %r12, %r11, %r10 | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 40(%rdi) | |||
pext %rax, %r11, %r10 | |||
pdep %rcx, %r10, %r10 | |||
xor %r10, 48(%rdi) | |||
mov $0x20202020202020, %rdx | |||
pext %rdx, %r11, %r10 | |||
mov $0x492490000000000, %r15 | |||
pdep %r15, %r10, %r10 | |||
xor %r10, 56(%rdi) | |||
pop %rbp | |||
pop %rbx | |||
pop %r12 | |||
pop %r13 | |||
pop %r14 | |||
pop %r15 | |||
ret |
@@ -1,784 +0,0 @@ | |||
.data | |||
.p2align 5 | |||
cast8_to_16: | |||
.byte 255 | |||
.byte 0 | |||
.byte 255 | |||
.byte 1 | |||
.byte 255 | |||
.byte 2 | |||
.byte 255 | |||
.byte 3 | |||
.byte 255 | |||
.byte 4 | |||
.byte 255 | |||
.byte 5 | |||
.byte 255 | |||
.byte 6 | |||
.byte 255 | |||
.byte 7 | |||
.byte 255 | |||
.byte 0 | |||
.byte 255 | |||
.byte 1 | |||
.byte 255 | |||
.byte 2 | |||
.byte 255 | |||
.byte 3 | |||
.byte 255 | |||
.byte 4 | |||
.byte 255 | |||
.byte 5 | |||
.byte 255 | |||
.byte 6 | |||
.byte 255 | |||
.byte 7 | |||
mask_ff: | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
.word 0xff | |||
mask_f: | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
.word 0xf | |||
mask_3: | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.word 0x03 | |||
.text | |||
.global PQCLEAN_NTRUHPS2048509_AVX2_vec32_sample_iid | |||
.global _PQCLEAN_NTRUHPS2048509_AVX2_vec32_sample_iid | |||
PQCLEAN_NTRUHPS2048509_AVX2_vec32_sample_iid: | |||
_PQCLEAN_NTRUHPS2048509_AVX2_vec32_sample_iid: | |||
vmovdqa 0(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 0(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 32(%rdi) | |||
vmovdqa 32(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 64(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 96(%rdi) | |||
vmovdqa 64(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 128(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 160(%rdi) | |||
vmovdqa 96(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 192(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 224(%rdi) | |||
vmovdqa 128(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 256(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 288(%rdi) | |||
vmovdqa 160(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 320(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 352(%rdi) | |||
vmovdqa 192(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 384(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 416(%rdi) | |||
vmovdqa 224(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 448(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 480(%rdi) | |||
vmovdqa 256(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 512(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 544(%rdi) | |||
vmovdqa 288(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 576(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 608(%rdi) | |||
vmovdqa 320(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 640(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 672(%rdi) | |||
vmovdqa 352(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 704(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 736(%rdi) | |||
vmovdqa 384(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 768(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 800(%rdi) | |||
vmovdqa 416(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 832(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 864(%rdi) | |||
vmovdqa 448(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 896(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 928(%rdi) | |||
vmovdqa 480(%rsi), %ymm3 | |||
vextracti128 $0, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 960(%rdi) | |||
vextracti128 $1, %ymm3, %xmm1 | |||
vpermq $216, %ymm1, %ymm1 | |||
vpshufb cast8_to_16(%rip), %ymm1, %ymm1 | |||
vpsrlw $8, %ymm1, %ymm2 | |||
vpand mask_ff(%rip), %ymm1, %ymm1 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_f(%rip), %ymm2, %ymm1 | |||
vpsrlw $4, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpand mask_3(%rip), %ymm2, %ymm1 | |||
vpsrlw $2, %ymm2, %ymm2 | |||
vpaddw %ymm2, %ymm1, %ymm2 | |||
vpsubw mask_3(%rip), %ymm2, %ymm14 | |||
vpsraw $15, %ymm14, %ymm15 | |||
vpandn %ymm14, %ymm15, %ymm1 | |||
vpand %ymm15, %ymm2, %ymm14 | |||
vpxor %ymm14, %ymm1, %ymm2 | |||
vmovdqa %ymm2, 992(%rdi) | |||
movw $0, 1016(%rdi) | |||
movw $0, 1018(%rdi) | |||
movw $0, 1020(%rdi) | |||
movw $0, 1022(%rdi) | |||
ret |
@@ -1,20 +0,0 @@ | |||
set( | |||
SRC_CLEAN_NTRUHPS2048509 | |||
cmov.c | |||
crypto_sort_int32.c | |||
kem.c | |||
owcpa.c | |||
pack3.c | |||
packq.c | |||
poly.c | |||
poly_lift.c | |||
poly_mod.c | |||
poly_r2_inv.c | |||
poly_rq_mul.c | |||
poly_s3_inv.c | |||
sample.c | |||
sample_iid.c | |||
) | |||
define_kem_alg(ntruhps2048509_clean | |||
PQCLEAN_NTRUHPS2048509_CLEAN "${SRC_CLEAN_NTRUHPS2048509}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -1,19 +0,0 @@ | |||
#ifndef PQCLEAN_NTRUHPS2048509_CLEAN_API_H | |||
#define PQCLEAN_NTRUHPS2048509_CLEAN_API_H | |||
#include <stdint.h> | |||
#define PQCLEAN_NTRUHPS2048509_CLEAN_CRYPTO_SECRETKEYBYTES 935 | |||
#define PQCLEAN_NTRUHPS2048509_CLEAN_CRYPTO_PUBLICKEYBYTES 699 | |||
#define PQCLEAN_NTRUHPS2048509_CLEAN_CRYPTO_CIPHERTEXTBYTES 699 | |||
#define PQCLEAN_NTRUHPS2048509_CLEAN_CRYPTO_BYTES 32 | |||
#define PQCLEAN_NTRUHPS2048509_CLEAN_CRYPTO_ALGNAME "ntruhps2048509" | |||
int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk); | |||
int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk); | |||
int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_dec(uint8_t *k, const uint8_t *c, const uint8_t *sk); | |||
#endif |
@@ -1,11 +0,0 @@ | |||
#include "cmov.h" | |||
/* b = 1 means mov, b = 0 means don't mov*/ | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_cmov(unsigned char *r, const unsigned char *x, size_t len, unsigned char b) { | |||
size_t i; | |||
b = (~b + 1); | |||
for (i = 0; i < len; i++) { | |||
r[i] ^= b & (x[i] ^ r[i]); | |||
} | |||
} |
@@ -1,10 +0,0 @@ | |||
#ifndef VERIFY_H | |||
#define VERIFY_H | |||
#include "params.h" | |||
#include <stddef.h> | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_cmov(unsigned char *r, const unsigned char *x, size_t len, unsigned char b); | |||
#endif |
@@ -1,86 +0,0 @@ | |||
// Based on supercop-20190110/crypto_sort/int32/x86 | |||
#include "crypto_sort_int32.h" | |||
#include <stdint.h> | |||
#define int32 int32_t | |||
#define int32_MINMAX(a,b) \ | |||
do { \ | |||
int32_t ab = (b) ^ (a); \ | |||
int32_t c = (int32_t)((int64_t)(b) - (int64_t)(a)); \ | |||
c ^= ab & (c ^ (b)); \ | |||
c >>= 31; \ | |||
c &= ab; \ | |||
(a) ^= c; \ | |||
(b) ^= c; \ | |||
} while(0) | |||
/* assume 2 <= n <= 0x40000000 */ | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_crypto_sort_int32(int32 *array, size_t n) { | |||
size_t top, p, q, r, i, j; | |||
int32 *x = array; | |||
top = 1; | |||
while (top < n - top) { | |||
top += top; | |||
} | |||
for (p = top; p >= 1; p >>= 1) { | |||
i = 0; | |||
while (i + 2 * p <= n) { | |||
for (j = i; j < i + p; ++j) { | |||
int32_MINMAX(x[j], x[j + p]); | |||
} | |||
i += 2 * p; | |||
} | |||
for (j = i; j < n - p; ++j) { | |||
int32_MINMAX(x[j], x[j + p]); | |||
} | |||
i = 0; | |||
j = 0; | |||
for (q = top; q > p; q >>= 1) { | |||
if (j != i) { | |||
for (;;) { | |||
if (j == n - q) { | |||
goto done; | |||
} | |||
int32 a = x[j + p]; | |||
for (r = q; r > p; r >>= 1) { | |||
int32_MINMAX(a, x[j + r]); | |||
} | |||
x[j + p] = a; | |||
++j; | |||
if (j == i + p) { | |||
i += 2 * p; | |||
break; | |||
} | |||
} | |||
} | |||
while (i + p <= n - q) { | |||
for (j = i; j < i + p; ++j) { | |||
int32 a = x[j + p]; | |||
for (r = q; r > p; r >>= 1) { | |||
int32_MINMAX(a, x[j + r]); | |||
} | |||
x[j + p] = a; | |||
} | |||
i += 2 * p; | |||
} | |||
/* now i + p > n - q */ | |||
j = i; | |||
while (j < n - q) { | |||
int32 a = x[j + p]; | |||
for (r = q; r > p; r >>= 1) { | |||
int32_MINMAX(a, x[j + r]); | |||
} | |||
x[j + p] = a; | |||
++j; | |||
} | |||
done: | |||
; | |||
} | |||
} | |||
} |
@@ -1,11 +0,0 @@ | |||
#ifndef CRYPTO_SORT | |||
#define CRYPTO_SORT | |||
#include "params.h" | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_crypto_sort_int32(int32_t *array, size_t n); | |||
#endif |
@@ -1,63 +0,0 @@ | |||
#include "api.h" | |||
#include "cmov.h" | |||
#include "fips202.h" | |||
#include "owcpa.h" | |||
#include "params.h" | |||
#include "randombytes.h" | |||
#include "sample.h" | |||
// API FUNCTIONS | |||
int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { | |||
uint8_t seed[NTRU_SAMPLE_FG_BYTES]; | |||
randombytes(seed, NTRU_SAMPLE_FG_BYTES); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_keypair(pk, sk, seed); | |||
randombytes(sk + NTRU_OWCPA_SECRETKEYBYTES, NTRU_PRFKEYBYTES); | |||
return 0; | |||
} | |||
int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk) { | |||
poly r, m; | |||
uint8_t rm[NTRU_OWCPA_MSGBYTES]; | |||
uint8_t rm_seed[NTRU_SAMPLE_RM_BYTES]; | |||
randombytes(rm_seed, NTRU_SAMPLE_RM_BYTES); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_sample_rm(&r, &m, rm_seed); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(rm, &r); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, &m); | |||
sha3_256(k, rm, NTRU_OWCPA_MSGBYTES); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(&r); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_enc(c, &r, &m, pk); | |||
return 0; | |||
} | |||
int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_dec(uint8_t *k, const uint8_t *c, const uint8_t *sk) { | |||
int i, fail; | |||
uint8_t rm[NTRU_OWCPA_MSGBYTES]; | |||
uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES]; | |||
fail = PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec(rm, c, sk); | |||
/* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */ | |||
/* See comment in PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec for details. */ | |||
sha3_256(k, rm, NTRU_OWCPA_MSGBYTES); | |||
/* shake(secret PRF key || input ciphertext) */ | |||
for (i = 0; i < NTRU_PRFKEYBYTES; i++) { | |||
buf[i] = sk[i + NTRU_OWCPA_SECRETKEYBYTES]; | |||
} | |||
for (i = 0; i < NTRU_CIPHERTEXTBYTES; i++) { | |||
buf[NTRU_PRFKEYBYTES + i] = c[i]; | |||
} | |||
sha3_256(rm, buf, NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_cmov(k, rm, NTRU_SHAREDKEYBYTES, (unsigned char) fail); | |||
return 0; | |||
} |
@@ -1,183 +0,0 @@ | |||
#include "owcpa.h" | |||
#include "poly.h" | |||
#include "sample.h" | |||
static int owcpa_check_ciphertext(const unsigned char *ciphertext) { | |||
/* A ciphertext is log2(q)*(n-1) bits packed into bytes. */ | |||
/* Check that any unused bits of the final byte are zero. */ | |||
uint16_t t = 0; | |||
t = ciphertext[NTRU_CIPHERTEXTBYTES - 1]; | |||
t &= 0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG))); | |||
/* We have 0 <= t < 256 */ | |||
/* Return 0 on success (t=0), 1 on failure */ | |||
return (int) (1 & ((~t + 1) >> 15)); | |||
} | |||
static int owcpa_check_r(const poly *r) { | |||
/* A valid r has coefficients in {0,1,q-1} and has r[N-1] = 0 */ | |||
/* Note: We may assume that 0 <= r[i] <= q-1 for all i */ | |||
int i; | |||
uint32_t t = 0; | |||
uint16_t c; | |||
for (i = 0; i < NTRU_N - 1; i++) { | |||
c = r->coeffs[i]; | |||
t |= (c + 1) & (NTRU_Q - 4); /* 0 iff c is in {-1,0,1,2} */ | |||
t |= (c + 2) & 4; /* 1 if c = 2, 0 if c is in {-1,0,1} */ | |||
} | |||
t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */ | |||
/* We have 0 <= t < 2^16. */ | |||
/* Return 0 on success (t=0), 1 on failure */ | |||
return (int) (1 & ((~t + 1) >> 31)); | |||
} | |||
static int owcpa_check_m(const poly *m) { | |||
/* Check that m is in message space, i.e. */ | |||
/* (1) |{i : m[i] = 1}| = |{i : m[i] = 2}|, and */ | |||
/* (2) |{i : m[i] != 0}| = NTRU_WEIGHT. */ | |||
/* Note: We may assume that m has coefficients in {0,1,2}. */ | |||
int i; | |||
uint32_t t = 0; | |||
uint16_t ps = 0; | |||
uint16_t ms = 0; | |||
for (i = 0; i < NTRU_N; i++) { | |||
ps += m->coeffs[i] & 1; | |||
ms += m->coeffs[i] & 2; | |||
} | |||
t |= ps ^ (ms >> 1); /* 0 if (1) holds */ | |||
t |= ms ^ NTRU_WEIGHT; /* 0 if (1) and (2) hold */ | |||
/* We have 0 <= t < 2^16. */ | |||
/* Return 0 on success (t=0), 1 on failure */ | |||
return (int) (1 & ((~t + 1) >> 31)); | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_keypair(unsigned char *pk, | |||
unsigned char *sk, | |||
const unsigned char seed[NTRU_SAMPLE_FG_BYTES]) { | |||
int i; | |||
poly x1, x2, x3, x4, x5; | |||
poly *f = &x1, *g = &x2, *invf_mod3 = &x3; | |||
poly *gf = &x3, *invgf = &x4, *tmp = &x5; | |||
poly *invh = &x3, *h = &x3; | |||
PQCLEAN_NTRUHPS2048509_CLEAN_sample_fg(f, g, seed); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_inv(invf_mod3, f); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(sk, f); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(sk + NTRU_PACK_TRINARY_BYTES, invf_mod3); | |||
/* Lift coeffs of f and g from Z_p to Z_q */ | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(f); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(g); | |||
/* g = 3*g */ | |||
for (i = 0; i < NTRU_N; i++) { | |||
g->coeffs[i] = 3 * g->coeffs[i]; | |||
} | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(gf, g, f); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_inv(invgf, gf); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(tmp, invgf, f); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_mul(invh, tmp, f); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_tobytes(sk + 2 * NTRU_PACK_TRINARY_BYTES, invh); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(tmp, invgf, g); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(h, tmp, g); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_tobytes(pk, h); | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_enc(unsigned char *c, | |||
const poly *r, | |||
const poly *m, | |||
const unsigned char *pk) { | |||
int i; | |||
poly x1, x2; | |||
poly *h = &x1, *liftm = &x1; | |||
poly *ct = &x2; | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_frombytes(h, pk); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(ct, r, h); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_lift(liftm, m); | |||
for (i = 0; i < NTRU_N; i++) { | |||
ct->coeffs[i] = ct->coeffs[i] + liftm->coeffs[i]; | |||
} | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_tobytes(c, ct); | |||
} | |||
int PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec(unsigned char *rm, | |||
const unsigned char *ciphertext, | |||
const unsigned char *secretkey) { | |||
int i; | |||
int fail; | |||
poly x1, x2, x3, x4; | |||
poly *c = &x1, *f = &x2, *cf = &x3; | |||
poly *mf = &x2, *finv3 = &x3, *m = &x4; | |||
poly *liftm = &x2, *invh = &x3, *r = &x4; | |||
poly *b = &x1; | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_frombytes(c, ciphertext); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_frombytes(f, secretkey); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(f); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(cf, c, f); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_to_S3(mf, cf); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_frombytes(finv3, secretkey + NTRU_PACK_TRINARY_BYTES); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_mul(m, mf, finv3); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, m); | |||
fail = 0; | |||
/* Check that the unused bits of the last byte of the ciphertext are zero */ | |||
fail |= owcpa_check_ciphertext(ciphertext); | |||
/* For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */ | |||
/* We can avoid re-computing r*h + Lift(m) as long as we check that */ | |||
/* r (defined as b/h mod (q, Phi_n)) and m are in the message space. */ | |||
/* (m can take any value in S3 in NTRU_HRSS) */ | |||
fail |= owcpa_check_m(m); | |||
/* b = c - Lift(m) mod (q, x^n - 1) */ | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_lift(liftm, m); | |||
for (i = 0; i < NTRU_N; i++) { | |||
b->coeffs[i] = c->coeffs[i] - liftm->coeffs[i]; | |||
} | |||
/* r = b / h mod (q, Phi_n) */ | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_frombytes(invh, secretkey + 2 * NTRU_PACK_TRINARY_BYTES); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_mul(r, b, invh); | |||
/* NOTE: Our definition of r as b/h mod (q, Phi_n) follows Figure 4 of */ | |||
/* [Sch18] https://eprint.iacr.org/2018/1174/20181203:032458. */ | |||
/* This differs from Figure 10 of Saito--Xagawa--Yamakawa */ | |||
/* [SXY17] https://eprint.iacr.org/2017/1005/20180516:055500 */ | |||
/* where r gets a final reduction modulo p. */ | |||
/* We need this change to use Proposition 1 of [Sch18]. */ | |||
/* Proposition 1 of [Sch18] shows that re-encryption with (r,m) yields c. */ | |||
/* if and only if fail==0 after the following call to owcpa_check_r */ | |||
/* The procedure given in Fig. 8 of [Sch18] can be skipped because we have */ | |||
/* c(1) = 0 due to the use of poly_Rq_sum_zero_{to,from}bytes. */ | |||
fail |= owcpa_check_r(r); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_trinary_Zq_to_Z3(r); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(rm, r); | |||
return fail; | |||
} |
@@ -1,19 +0,0 @@ | |||
#ifndef OWCPA_H | |||
#define OWCPA_H | |||
#include "params.h" | |||
#include "poly.h" | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_keypair(unsigned char *pk, | |||
unsigned char *sk, | |||
const unsigned char seed[NTRU_SAMPLE_FG_BYTES]); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_enc(unsigned char *c, | |||
const poly *r, | |||
const poly *m, | |||
const unsigned char *pk); | |||
int PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec(unsigned char *rm, | |||
const unsigned char *ciphertext, | |||
const unsigned char *secretkey); | |||
#endif |
@@ -1,46 +0,0 @@ | |||
#include "poly.h" | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(unsigned char msg[NTRU_OWCPA_MSGBYTES], const poly *a) { | |||
int i; | |||
unsigned char c; | |||
int j; | |||
for (i = 0; i < NTRU_PACK_DEG / 5; i++) { | |||
c = a->coeffs[5 * i + 4] & 255; | |||
c = (3 * c + a->coeffs[5 * i + 3]) & 255; | |||
c = (3 * c + a->coeffs[5 * i + 2]) & 255; | |||
c = (3 * c + a->coeffs[5 * i + 1]) & 255; | |||
c = (3 * c + a->coeffs[5 * i + 0]) & 255; | |||
msg[i] = c; | |||
} | |||
i = NTRU_PACK_DEG / 5; | |||
c = 0; | |||
for (j = NTRU_PACK_DEG - (5 * i) - 1; j >= 0; j--) { | |||
c = (3 * c + a->coeffs[5 * i + j]) & 255; | |||
} | |||
msg[i] = c; | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_frombytes(poly *r, const unsigned char msg[NTRU_OWCPA_MSGBYTES]) { | |||
int i; | |||
unsigned char c; | |||
int j; | |||
for (i = 0; i < NTRU_PACK_DEG / 5; i++) { | |||
c = msg[i]; | |||
r->coeffs[5 * i + 0] = c; | |||
r->coeffs[5 * i + 1] = c * 171 >> 9; // this is division by 3 | |||
r->coeffs[5 * i + 2] = c * 57 >> 9; // division by 3^2 | |||
r->coeffs[5 * i + 3] = c * 19 >> 9; // division by 3^3 | |||
r->coeffs[5 * i + 4] = c * 203 >> 14; // etc. | |||
} | |||
i = NTRU_PACK_DEG / 5; | |||
c = msg[i]; | |||
for (j = 0; (5 * i + j) < NTRU_PACK_DEG; j++) { | |||
r->coeffs[5 * i + j] = c; | |||
c = c * 171 >> 9; | |||
} | |||
r->coeffs[NTRU_N - 1] = 0; | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(r); | |||
} | |||
@@ -1,93 +0,0 @@ | |||
#include "poly.h" | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_tobytes(unsigned char *r, const poly *a) { | |||
int i, j; | |||
uint16_t t[8]; | |||
for (i = 0; i < NTRU_PACK_DEG / 8; i++) { | |||
for (j = 0; j < 8; j++) { | |||
t[j] = MODQ(a->coeffs[8 * i + j]); | |||
} | |||
r[11 * i + 0] = (unsigned char) ( t[0] & 0xff); | |||
r[11 * i + 1] = (unsigned char) ((t[0] >> 8) | ((t[1] & 0x1f) << 3)); | |||
r[11 * i + 2] = (unsigned char) ((t[1] >> 5) | ((t[2] & 0x03) << 6)); | |||
r[11 * i + 3] = (unsigned char) ((t[2] >> 2) & 0xff); | |||
r[11 * i + 4] = (unsigned char) ((t[2] >> 10) | ((t[3] & 0x7f) << 1)); | |||
r[11 * i + 5] = (unsigned char) ((t[3] >> 7) | ((t[4] & 0x0f) << 4)); | |||
r[11 * i + 6] = (unsigned char) ((t[4] >> 4) | ((t[5] & 0x01) << 7)); | |||
r[11 * i + 7] = (unsigned char) ((t[5] >> 1) & 0xff); | |||
r[11 * i + 8] = (unsigned char) ((t[5] >> 9) | ((t[6] & 0x3f) << 2)); | |||
r[11 * i + 9] = (unsigned char) ((t[6] >> 6) | ((t[7] & 0x07) << 5)); | |||
r[11 * i + 10] = (unsigned char) ((t[7] >> 3)); | |||
} | |||
for (j = 0; j < NTRU_PACK_DEG - 8 * i; j++) { | |||
t[j] = MODQ(a->coeffs[8 * i + j]); | |||
} | |||
for (; j < 8; j++) { | |||
t[j] = 0; | |||
} | |||
switch (NTRU_PACK_DEG & 0x07) { | |||
// cases 0 and 6 are impossible since 2 generates (Z/n)* and | |||
// p mod 8 in {1, 7} implies that 2 is a quadratic residue. | |||
case 4: | |||
r[11 * i + 0] = (unsigned char) (t[0] & 0xff); | |||
r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3); | |||
r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6); | |||
r[11 * i + 3] = (unsigned char) (t[2] >> 2) & 0xff; | |||
r[11 * i + 4] = (unsigned char) (t[2] >> 10) | ((t[3] & 0x7f) << 1); | |||
r[11 * i + 5] = (unsigned char) (t[3] >> 7) | ((t[4] & 0x0f) << 4); | |||
break; | |||
case 2: | |||
r[11 * i + 0] = (unsigned char) (t[0] & 0xff); | |||
r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3); | |||
r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6); | |||
break; | |||
} | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_frombytes(poly *r, const unsigned char *a) { | |||
int i; | |||
for (i = 0; i < NTRU_PACK_DEG / 8; i++) { | |||
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8); | |||
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5); | |||
r->coeffs[8 * i + 2] = (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10); | |||
r->coeffs[8 * i + 3] = (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7); | |||
r->coeffs[8 * i + 4] = (a[11 * i + 5] >> 4) | (((uint16_t)a[11 * i + 6] & 0x7f) << 4); | |||
r->coeffs[8 * i + 5] = (a[11 * i + 6] >> 7) | (((uint16_t)a[11 * i + 7] & 0xff) << 1) | (((uint16_t)a[11 * i + 8] & 0x03) << 9); | |||
r->coeffs[8 * i + 6] = (a[11 * i + 8] >> 2) | (((uint16_t)a[11 * i + 9] & 0x1f) << 6); | |||
r->coeffs[8 * i + 7] = (a[11 * i + 9] >> 5) | (((uint16_t)a[11 * i + 10] & 0xff) << 3); | |||
} | |||
switch (NTRU_PACK_DEG & 0x07) { | |||
// cases 0 and 6 are impossible since 2 generates (Z/n)* and | |||
// p mod 8 in {1, 7} implies that 2 is a quadratic residue. | |||
case 4: | |||
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8); | |||
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5); | |||
r->coeffs[8 * i + 2] = (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10); | |||
r->coeffs[8 * i + 3] = (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7); | |||
break; | |||
case 2: | |||
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8); | |||
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5); | |||
break; | |||
} | |||
r->coeffs[NTRU_N - 1] = 0; | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a) { | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_tobytes(r, a); | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_frombytes(poly *r, const unsigned char *a) { | |||
int i; | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_frombytes(r, a); | |||
/* Set r[n-1] so that the sum of coefficients is zero mod q */ | |||
r->coeffs[NTRU_N - 1] = 0; | |||
for (i = 0; i < NTRU_PACK_DEG; i++) { | |||
r->coeffs[NTRU_N - 1] -= r->coeffs[i]; | |||
} | |||
} |
@@ -1,37 +0,0 @@ | |||
#ifndef PARAMS_H | |||
#define PARAMS_H | |||
#define NTRU_HPS | |||
#define NTRU_N 509 | |||
#define NTRU_LOGQ 11 | |||
/* Do not modify below this line */ | |||
#define PAD32(X) ((((X) + 31)/32)*32) | |||
#define NTRU_Q (1 << NTRU_LOGQ) | |||
#define NTRU_WEIGHT (NTRU_Q/8 - 2) | |||
#define NTRU_SEEDBYTES 32 | |||
#define NTRU_PRFKEYBYTES 32 | |||
#define NTRU_SHAREDKEYBYTES 32 | |||
#define NTRU_SAMPLE_IID_BYTES (NTRU_N-1) | |||
#define NTRU_SAMPLE_FT_BYTES ((30*(NTRU_N-1)+7)/8) | |||
#define NTRU_SAMPLE_FG_BYTES (NTRU_SAMPLE_IID_BYTES+NTRU_SAMPLE_FT_BYTES) | |||
#define NTRU_SAMPLE_RM_BYTES (NTRU_SAMPLE_IID_BYTES+NTRU_SAMPLE_FT_BYTES) | |||
#define NTRU_PACK_DEG (NTRU_N-1) | |||
#define NTRU_PACK_TRINARY_BYTES ((NTRU_PACK_DEG+4)/5) | |||
#define NTRU_OWCPA_MSGBYTES (2*NTRU_PACK_TRINARY_BYTES) | |||
#define NTRU_OWCPA_PUBLICKEYBYTES ((NTRU_LOGQ*NTRU_PACK_DEG+7)/8) | |||
#define NTRU_OWCPA_SECRETKEYBYTES (2*NTRU_PACK_TRINARY_BYTES + NTRU_OWCPA_PUBLICKEYBYTES) | |||
#define NTRU_OWCPA_BYTES ((NTRU_LOGQ*NTRU_PACK_DEG+7)/8) | |||
#define NTRU_PUBLICKEYBYTES (NTRU_OWCPA_PUBLICKEYBYTES) | |||
#define NTRU_SECRETKEYBYTES (NTRU_OWCPA_SECRETKEYBYTES + NTRU_PRFKEYBYTES) | |||
#define NTRU_CIPHERTEXTBYTES (NTRU_OWCPA_BYTES) | |||
#endif |
@@ -1,75 +0,0 @@ | |||
#include "poly.h" | |||
/* Map {0, 1, 2} -> {0,1,q-1} in place */ | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(poly *r) { | |||
int i; | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = r->coeffs[i] | ((-(r->coeffs[i] >> 1)) & (NTRU_Q - 1)); | |||
} | |||
} | |||
/* Map {0, 1, q-1} -> {0,1,2} in place */ | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_trinary_Zq_to_Z3(poly *r) { | |||
int i; | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = MODQ(r->coeffs[i]); | |||
r->coeffs[i] = 3 & (r->coeffs[i] ^ (r->coeffs[i] >> (NTRU_LOGQ - 1))); | |||
} | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_mul(poly *r, const poly *a, const poly *b) { | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(r, a, b); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_q_Phi_n(r); | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_mul(poly *r, const poly *a, const poly *b) { | |||
int i; | |||
/* Our S3 multiplications do not overflow mod q, */ | |||
/* so we can re-purpose PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul, as long as we */ | |||
/* follow with an explicit reduction mod q. */ | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(r, a, b); | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = MODQ(r->coeffs[i]); | |||
} | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(r); | |||
} | |||
static void PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { | |||
int i; | |||
poly b, c; | |||
poly s; | |||
// for 0..4 | |||
// ai = ai * (2 - a*ai) mod q | |||
for (i = 0; i < NTRU_N; i++) { | |||
b.coeffs[i] = -(a->coeffs[i]); | |||
} | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = ai->coeffs[i]; | |||
} | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(&c, r, &b); | |||
c.coeffs[0] += 2; // c = 2 - a*ai | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(&s, &c, r); // s = ai*c | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(&c, &s, &b); | |||
c.coeffs[0] += 2; // c = 2 - a*s | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(r, &c, &s); // r = s*c | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(&c, r, &b); | |||
c.coeffs[0] += 2; // c = 2 - a*r | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(&s, &c, r); // s = r*c | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(&c, &s, &b); | |||
c.coeffs[0] += 2; // c = 2 - a*s | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(r, &c, &s); // r = s*c | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_inv(poly *r, const poly *a) { | |||
poly ai2; | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv(&ai2, a); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv_to_Rq_inv(r, &ai2, a); | |||
} |
@@ -1,39 +0,0 @@ | |||
#ifndef POLY_H | |||
#define POLY_H | |||
#include "params.h" | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define MODQ(X) ((X) & (NTRU_Q-1)) | |||
typedef struct { | |||
uint16_t coeffs[NTRU_N]; | |||
} poly; | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(poly *r); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_q_Phi_n(poly *r); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_tobytes(unsigned char *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_frombytes(poly *r, const unsigned char *a); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_frombytes(poly *r, const unsigned char *a); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(unsigned char msg[NTRU_PACK_TRINARY_BYTES], const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_frombytes(poly *r, const unsigned char msg[NTRU_PACK_TRINARY_BYTES]); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_mul(poly *r, const poly *a, const poly *b); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_mul(poly *r, const poly *a, const poly *b); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_lift(poly *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_to_S3(poly *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv(poly *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_inv(poly *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_inv(poly *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(poly *r); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_trinary_Zq_to_Z3(poly *r); | |||
#endif |
@@ -1,11 +0,0 @@ | |||
#include "poly.h" | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_lift(poly *r, const poly *a) { | |||
int i; | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = a->coeffs[i]; | |||
} | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(r); | |||
} | |||
@@ -1,53 +0,0 @@ | |||
#include "poly.h" | |||
static uint16_t mod3(uint16_t a) { | |||
uint16_t r; | |||
int16_t t, c; | |||
r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 | |||
r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 | |||
r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 | |||
r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 | |||
t = r - 3; | |||
c = t >> 15; | |||
return (c & r) ^ (~c & t); | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(poly *r) { | |||
int i; | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]); | |||
} | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_q_Phi_n(poly *r) { | |||
int i; | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = r->coeffs[i] - r->coeffs[NTRU_N - 1]; | |||
} | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_to_S3(poly *r, const poly *a) { | |||
int i; | |||
uint16_t flag; | |||
/* The coefficients of a are stored as non-negative integers. */ | |||
/* We must translate to representatives in [-q/2, q/2) before */ | |||
/* reduction mod 3. */ | |||
for (i = 0; i < NTRU_N; i++) { | |||
/* Need an explicit reduction mod q here */ | |||
r->coeffs[i] = MODQ(a->coeffs[i]); | |||
/* flag = 1 if r[i] >= q/2 else 0 */ | |||
flag = r->coeffs[i] >> (NTRU_LOGQ - 1); | |||
/* Now we will add (-q) mod 3 if r[i] >= q/2 */ | |||
/* Note (-q) mod 3=(-2^k) mod 3=1<<(1-(k&1)) */ | |||
r->coeffs[i] += flag << (1 - (NTRU_LOGQ & 1)); | |||
} | |||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(r); | |||
} | |||
@@ -1,69 +0,0 @@ | |||
/* Based on supercop-20200702/crypto_core/invhrss701/simpler/core.c */ | |||
#include "poly.h" | |||
/* return -1 if x<0 and y<0; otherwise return 0 */ | |||
static inline int16_t both_negative_mask(int16_t x, int16_t y) { | |||
return (x & y) >> 15; | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv(poly *r, const poly *a) { | |||
poly f, g, v, w; | |||
size_t i, loop; | |||
int16_t delta, sign, swap, t; | |||
for (i = 0; i < NTRU_N; ++i) { | |||
v.coeffs[i] = 0; | |||
} | |||
for (i = 0; i < NTRU_N; ++i) { | |||
w.coeffs[i] = 0; | |||
} | |||
w.coeffs[0] = 1; | |||
for (i = 0; i < NTRU_N; ++i) { | |||
f.coeffs[i] = 1; | |||
} | |||
for (i = 0; i < NTRU_N - 1; ++i) { | |||
g.coeffs[NTRU_N - 2 - i] = (a->coeffs[i] ^ a->coeffs[NTRU_N - 1]) & 1; | |||
} | |||
g.coeffs[NTRU_N - 1] = 0; | |||
delta = 1; | |||
for (loop = 0; loop < 2 * (NTRU_N - 1) - 1; ++loop) { | |||
for (i = NTRU_N - 1; i > 0; --i) { | |||
v.coeffs[i] = v.coeffs[i - 1]; | |||
} | |||
v.coeffs[0] = 0; | |||
sign = g.coeffs[0] & f.coeffs[0]; | |||
swap = both_negative_mask(-delta, -(int16_t) g.coeffs[0]); | |||
delta ^= swap & (delta ^ -delta); | |||
delta += 1; | |||
for (i = 0; i < NTRU_N; ++i) { | |||
t = swap & (f.coeffs[i] ^ g.coeffs[i]); | |||
f.coeffs[i] ^= t; | |||
g.coeffs[i] ^= t; | |||
t = swap & (v.coeffs[i] ^ w.coeffs[i]); | |||
v.coeffs[i] ^= t; | |||
w.coeffs[i] ^= t; | |||
} | |||
for (i = 0; i < NTRU_N; ++i) { | |||
g.coeffs[i] = g.coeffs[i] ^ (sign & f.coeffs[i]); | |||
} | |||
for (i = 0; i < NTRU_N; ++i) { | |||
w.coeffs[i] = w.coeffs[i] ^ (sign & v.coeffs[i]); | |||
} | |||
for (i = 0; i < NTRU_N - 1; ++i) { | |||
g.coeffs[i] = g.coeffs[i + 1]; | |||
} | |||
g.coeffs[NTRU_N - 1] = 0; | |||
} | |||
for (i = 0; i < NTRU_N - 1; ++i) { | |||
r->coeffs[i] = v.coeffs[NTRU_N - 2 - i]; | |||
} | |||
r->coeffs[NTRU_N - 1] = 0; | |||
} |
@@ -1,284 +0,0 @@ | |||
#include "poly.h" | |||
/* Polynomial multiplication using */ | |||
/* Toom-4 and two layers of Karatsuba. */ | |||
#define L PAD32(NTRU_N) | |||
#define M (L/4) | |||
#define K (L/16) | |||
static void toom4_k2x2_mul(uint16_t ab[2 * L], const uint16_t a[L], const uint16_t b[L]); | |||
static void toom4_k2x2_eval_0(uint16_t r[9 * K], const uint16_t a[M]); | |||
static void toom4_k2x2_eval_p1(uint16_t r[9 * K], const uint16_t a[M]); | |||
static void toom4_k2x2_eval_m1(uint16_t r[9 * K], const uint16_t a[M]); | |||
static void toom4_k2x2_eval_p2(uint16_t r[9 * K], const uint16_t a[M]); | |||
static void toom4_k2x2_eval_m2(uint16_t r[9 * K], const uint16_t a[M]); | |||
static void toom4_k2x2_eval_p3(uint16_t r[9 * K], const uint16_t a[M]); | |||
static void toom4_k2x2_eval_inf(uint16_t r[9 * K], const uint16_t a[M]); | |||
static inline void k2x2_eval(uint16_t r[9 * K]); | |||
static void toom4_k2x2_basemul(uint16_t r[18 * K], const uint16_t a[9 * K], const uint16_t b[9 * K]); | |||
static inline void schoolbook_KxK(uint16_t r[2 * K], const uint16_t a[K], const uint16_t b[K]); | |||
static void toom4_k2x2_interpolate(uint16_t r[2 * M], const uint16_t a[63 * 2 * K]); | |||
static inline void k2x2_interpolate(uint16_t r[M], const uint16_t a[9 * K]); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b) { | |||
size_t i; | |||
uint16_t ab[2 * L]; | |||
for (i = 0; i < NTRU_N; i++) { | |||
ab[i] = a->coeffs[i]; | |||
ab[L + i] = b->coeffs[i]; | |||
} | |||
for (i = NTRU_N; i < L; i++) { | |||
ab[i] = 0; | |||
ab[L + i] = 0; | |||
} | |||
toom4_k2x2_mul(ab, ab, ab + L); | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = ab[i] + ab[NTRU_N + i]; | |||
} | |||
} | |||
static void toom4_k2x2_mul(uint16_t ab[2 * L], const uint16_t a[L], const uint16_t b[L]) { | |||
uint16_t tmpA[9 * K]; | |||
uint16_t tmpB[9 * K]; | |||
uint16_t eC[63 * 2 * K]; | |||
toom4_k2x2_eval_0(tmpA, a); | |||
toom4_k2x2_eval_0(tmpB, b); | |||
toom4_k2x2_basemul(eC + 0 * 9 * 2 * K, tmpA, tmpB); | |||
toom4_k2x2_eval_p1(tmpA, a); | |||
toom4_k2x2_eval_p1(tmpB, b); | |||
toom4_k2x2_basemul(eC + 1 * 9 * 2 * K, tmpA, tmpB); | |||
toom4_k2x2_eval_m1(tmpA, a); | |||
toom4_k2x2_eval_m1(tmpB, b); | |||
toom4_k2x2_basemul(eC + 2 * 9 * 2 * K, tmpA, tmpB); | |||
toom4_k2x2_eval_p2(tmpA, a); | |||
toom4_k2x2_eval_p2(tmpB, b); | |||
toom4_k2x2_basemul(eC + 3 * 9 * 2 * K, tmpA, tmpB); | |||
toom4_k2x2_eval_m2(tmpA, a); | |||
toom4_k2x2_eval_m2(tmpB, b); | |||
toom4_k2x2_basemul(eC + 4 * 9 * 2 * K, tmpA, tmpB); | |||
toom4_k2x2_eval_p3(tmpA, a); | |||
toom4_k2x2_eval_p3(tmpB, b); | |||
toom4_k2x2_basemul(eC + 5 * 9 * 2 * K, tmpA, tmpB); | |||
toom4_k2x2_eval_inf(tmpA, a); | |||
toom4_k2x2_eval_inf(tmpB, b); | |||
toom4_k2x2_basemul(eC + 6 * 9 * 2 * K, tmpA, tmpB); | |||
toom4_k2x2_interpolate(ab, eC); | |||
} | |||
static void toom4_k2x2_eval_0(uint16_t r[9 * K], const uint16_t a[M]) { | |||
for (size_t i = 0; i < M; i++) { | |||
r[i] = a[i]; | |||
} | |||
k2x2_eval(r); | |||
} | |||
static void toom4_k2x2_eval_p1(uint16_t r[9 * K], const uint16_t a[M]) { | |||
for (size_t i = 0; i < M; i++) { | |||
r[i] = a[0 * M + i]; | |||
r[i] += a[1 * M + i]; | |||
r[i] += a[2 * M + i]; | |||
r[i] += a[3 * M + i]; | |||
} | |||
k2x2_eval(r); | |||
} | |||
static void toom4_k2x2_eval_m1(uint16_t r[9 * K], const uint16_t a[M]) { | |||
for (size_t i = 0; i < M; i++) { | |||
r[i] = a[0 * M + i]; | |||
r[i] -= a[1 * M + i]; | |||
r[i] += a[2 * M + i]; | |||
r[i] -= a[3 * M + i]; | |||
} | |||
k2x2_eval(r); | |||
} | |||
static void toom4_k2x2_eval_p2(uint16_t r[9 * K], const uint16_t a[M]) { | |||
for (size_t i = 0; i < M; i++) { | |||
r[i] = a[0 * M + i]; | |||
r[i] += 2 * a[1 * M + i]; | |||
r[i] += 4 * a[2 * M + i]; | |||
r[i] += 8 * a[3 * M + i]; | |||
} | |||
k2x2_eval(r); | |||
} | |||
static void toom4_k2x2_eval_m2(uint16_t r[9 * K], const uint16_t a[M]) { | |||
for (size_t i = 0; i < M; i++) { | |||
r[i] = a[0 * M + i]; | |||
r[i] -= 2 * a[1 * M + i]; | |||
r[i] += 4 * a[2 * M + i]; | |||
r[i] -= 8 * a[3 * M + i]; | |||
} | |||
k2x2_eval(r); | |||
} | |||
static void toom4_k2x2_eval_p3(uint16_t r[9 * K], const uint16_t a[M]) { | |||
for (size_t i = 0; i < M; i++) { | |||
r[i] = a[0 * M + i]; | |||
r[i] += 3 * a[1 * M + i]; | |||
r[i] += 9 * a[2 * M + i]; | |||
r[i] += 27 * a[3 * M + i]; | |||
} | |||
k2x2_eval(r); | |||
} | |||
static void toom4_k2x2_eval_inf(uint16_t r[9 * K], const uint16_t a[M]) { | |||
for (size_t i = 0; i < M; i++) { | |||
r[i] = a[3 * M + i]; | |||
} | |||
k2x2_eval(r); | |||
} | |||
static inline void k2x2_eval(uint16_t r[9 * K]) { | |||
/* Input: e + f.Y + g.Y^2 + h.Y^3 */ | |||
/* Output: [ e | f | g | h | e+f | f+h | g+e | h+g | e+f+g+h ] */ | |||
size_t i; | |||
for (i = 0; i < 4 * K; i++) { | |||
r[4 * K + i] = r[i]; | |||
} | |||
for (i = 0; i < K; i++) { | |||
r[4 * K + i] += r[1 * K + i]; | |||
r[5 * K + i] += r[3 * K + i]; | |||
r[6 * K + i] += r[0 * K + i]; | |||
r[7 * K + i] += r[2 * K + i]; | |||
r[8 * K + i] = r[5 * K + i]; | |||
r[8 * K + i] += r[6 * K + i]; | |||
} | |||
} | |||
static void toom4_k2x2_basemul(uint16_t r[18 * K], const uint16_t a[9 * K], const uint16_t b[9 * K]) { | |||
schoolbook_KxK(r + 0 * 2 * K, a + 0 * K, b + 0 * K); | |||
schoolbook_KxK(r + 1 * 2 * K, a + 1 * K, b + 1 * K); | |||
schoolbook_KxK(r + 2 * 2 * K, a + 2 * K, b + 2 * K); | |||
schoolbook_KxK(r + 3 * 2 * K, a + 3 * K, b + 3 * K); | |||
schoolbook_KxK(r + 4 * 2 * K, a + 4 * K, b + 4 * K); | |||
schoolbook_KxK(r + 5 * 2 * K, a + 5 * K, b + 5 * K); | |||
schoolbook_KxK(r + 6 * 2 * K, a + 6 * K, b + 6 * K); | |||
schoolbook_KxK(r + 7 * 2 * K, a + 7 * K, b + 7 * K); | |||
schoolbook_KxK(r + 8 * 2 * K, a + 8 * K, b + 8 * K); | |||
} | |||
static inline void schoolbook_KxK(uint16_t r[2 * K], const uint16_t a[K], const uint16_t b[K]) { | |||
size_t i, j; | |||
for (j = 0; j < K; j++) { | |||
r[j] = a[0] * (uint32_t)b[j]; | |||
} | |||
for (i = 1; i < K; i++) { | |||
for (j = 0; j < K - 1; j++) { | |||
r[i + j] += a[i] * (uint32_t)b[j]; | |||
} | |||
r[i + K - 1] = a[i] * (uint32_t)b[K - 1]; | |||
} | |||
r[2 * K - 1] = 0; | |||
} | |||
static void toom4_k2x2_interpolate(uint16_t r[2 * M], const uint16_t a[7 * 18 * K]) { | |||
size_t i; | |||
uint16_t P1[2 * M]; | |||
uint16_t Pm1[2 * M]; | |||
uint16_t P2[2 * M]; | |||
uint16_t Pm2[2 * M]; | |||
uint16_t *C0 = r; | |||
uint16_t *C2 = r + 2 * M; | |||
uint16_t *C4 = r + 4 * M; | |||
uint16_t *C6 = r + 6 * M; | |||
uint16_t V0, V1, V2; | |||
k2x2_interpolate(C0, a + 0 * 9 * 2 * K); | |||
k2x2_interpolate(P1, a + 1 * 9 * 2 * K); | |||
k2x2_interpolate(Pm1, a + 2 * 9 * 2 * K); | |||
k2x2_interpolate(P2, a + 3 * 9 * 2 * K); | |||
k2x2_interpolate(Pm2, a + 4 * 9 * 2 * K); | |||
k2x2_interpolate(C6, a + 6 * 9 * 2 * K); | |||
for (i = 0; i < 2 * M; i++) { | |||
V0 = ((uint32_t)(P1[i] + Pm1[i])) >> 1; | |||
V0 = V0 - C0[i] - C6[i]; | |||
V1 = ((uint32_t)(P2[i] + Pm2[i] - 2 * C0[i] - 128 * C6[i])) >> 3; | |||
C4[i] = 43691 * (uint32_t)(V1 - V0); | |||
C2[i] = V0 - C4[i]; | |||
P1[i] = ((uint32_t)(P1[i] - Pm1[i])) >> 1; | |||
} | |||
/* reuse Pm1 for P3 */ | |||
#define P3 Pm1 | |||
k2x2_interpolate(P3, a + 5 * 9 * 2 * K); | |||
for (i = 0; i < 2 * M; i++) { | |||
V0 = P1[i]; | |||
V1 = 43691 * (((uint32_t)(P2[i] - Pm2[i]) >> 2) - V0); | |||
V2 = 43691 * (uint32_t)(P3[i] - C0[i] - 9 * (C2[i] + 9 * (C4[i] + 9 * C6[i]))); | |||
V2 = ((uint32_t)(V2 - V0)) >> 3; | |||
V2 -= V1; | |||
P3[i] = 52429 * (uint32_t)V2; | |||
P2[i] = V1 - V2; | |||
P1[i] = V0 - P2[i] - P3[i]; | |||
} | |||
for (i = 0; i < 2 * M; i++) { | |||
r[1 * M + i] += P1[i]; | |||
r[3 * M + i] += P2[i]; | |||
r[5 * M + i] += P3[i]; | |||
} | |||
} | |||
static inline void k2x2_interpolate(uint16_t r[M], const uint16_t a[9 * K]) { | |||
size_t i; | |||
uint16_t tmp[4 * K]; | |||
for (i = 0; i < 2 * K; i++) { | |||
r[0 * K + i] = a[0 * K + i]; | |||
r[2 * K + i] = a[2 * K + i]; | |||
} | |||
for (i = 0; i < 2 * K; i++) { | |||
r[1 * K + i] += a[8 * K + i] - a[0 * K + i] - a[2 * K + i]; | |||
} | |||
for (i = 0; i < 2 * K; i++) { | |||
r[4 * K + i] = a[4 * K + i]; | |||
r[6 * K + i] = a[6 * K + i]; | |||
} | |||
for (i = 0; i < 2 * K; i++) { | |||
r[5 * K + i] += a[14 * K + i] - a[4 * K + i] - a[6 * K + i]; | |||
} | |||
for (i = 0; i < 2 * K; i++) { | |||
tmp[0 * K + i] = a[12 * K + i]; | |||
tmp[2 * K + i] = a[10 * K + i]; | |||
} | |||
for (i = 0; i < 2 * K; i++) { | |||
tmp[K + i] += a[16 * K + i] - a[12 * K + i] - a[10 * K + i]; | |||
} | |||
for (i = 0; i < 4 * K; i++) { | |||
tmp[0 * K + i] = tmp[0 * K + i] - r[0 * K + i] - r[4 * K + i]; | |||
} | |||
for (i = 0; i < 4 * K; i++) { | |||
r[2 * K + i] += tmp[0 * K + i]; | |||
} | |||
} | |||
@@ -1,78 +0,0 @@ | |||
/* Based on supercop-20200702/crypto_core/invhrss701/simpler/core.c */ | |||
#include "poly.h" | |||
static inline uint8_t mod3(uint8_t a) { /* a between 0 and 9 */ | |||
int16_t t, c; | |||
a = (a >> 2) + (a & 3); /* between 0 and 4 */ | |||
t = a - 3; | |||
c = t >> 5; | |||
return (uint8_t) (t ^ (c & (a ^ t))); | |||
} | |||
/* return -1 if x<0 and y<0; otherwise return 0 */ | |||
static inline int16_t both_negative_mask(int16_t x, int16_t y) { | |||
return (x & y) >> 15; | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_inv(poly *r, const poly *a) { | |||
poly f, g, v, w; | |||
size_t i, loop; | |||
int16_t delta, sign, swap, t; | |||
for (i = 0; i < NTRU_N; ++i) { | |||
v.coeffs[i] = 0; | |||
} | |||
for (i = 0; i < NTRU_N; ++i) { | |||
w.coeffs[i] = 0; | |||
} | |||
w.coeffs[0] = 1; | |||
for (i = 0; i < NTRU_N; ++i) { | |||
f.coeffs[i] = 1; | |||
} | |||
for (i = 0; i < NTRU_N - 1; ++i) { | |||
g.coeffs[NTRU_N - 2 - i] = mod3((a->coeffs[i] & 3) + 2 * (a->coeffs[NTRU_N - 1] & 3)); | |||
} | |||
g.coeffs[NTRU_N - 1] = 0; | |||
delta = 1; | |||
for (loop = 0; loop < 2 * (NTRU_N - 1) - 1; ++loop) { | |||
for (i = NTRU_N - 1; i > 0; --i) { | |||
v.coeffs[i] = v.coeffs[i - 1]; | |||
} | |||
v.coeffs[0] = 0; | |||
sign = mod3((uint8_t) (2 * g.coeffs[0] * f.coeffs[0])); | |||
swap = both_negative_mask(-delta, -(int16_t) g.coeffs[0]); | |||
delta ^= swap & (delta ^ -delta); | |||
delta += 1; | |||
for (i = 0; i < NTRU_N; ++i) { | |||
t = swap & (f.coeffs[i] ^ g.coeffs[i]); | |||
f.coeffs[i] ^= t; | |||
g.coeffs[i] ^= t; | |||
t = swap & (v.coeffs[i] ^ w.coeffs[i]); | |||
v.coeffs[i] ^= t; | |||
w.coeffs[i] ^= t; | |||
} | |||
for (i = 0; i < NTRU_N; ++i) { | |||
g.coeffs[i] = mod3((uint8_t) (g.coeffs[i] + sign * f.coeffs[i])); | |||
} | |||
for (i = 0; i < NTRU_N; ++i) { | |||
w.coeffs[i] = mod3((uint8_t) (w.coeffs[i] + sign * v.coeffs[i])); | |||
} | |||
for (i = 0; i < NTRU_N - 1; ++i) { | |||
g.coeffs[i] = g.coeffs[i + 1]; | |||
} | |||
g.coeffs[NTRU_N - 1] = 0; | |||
} | |||
sign = f.coeffs[0]; | |||
for (i = 0; i < NTRU_N - 1; ++i) { | |||
r->coeffs[i] = mod3((uint8_t) (sign * v.coeffs[NTRU_N - 2 - i])); | |||
} | |||
r->coeffs[NTRU_N - 1] = 0; | |||
} |
@@ -1,45 +0,0 @@ | |||
#include "sample.h" | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]) { | |||
PQCLEAN_NTRUHPS2048509_CLEAN_sample_iid(f, uniformbytes); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_sample_fixed_type(g, uniformbytes + NTRU_SAMPLE_IID_BYTES); | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]) { | |||
PQCLEAN_NTRUHPS2048509_CLEAN_sample_iid(r, uniformbytes); | |||
PQCLEAN_NTRUHPS2048509_CLEAN_sample_fixed_type(m, uniformbytes + NTRU_SAMPLE_IID_BYTES); | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_sample_fixed_type(poly *r, const unsigned char u[NTRU_SAMPLE_FT_BYTES]) { | |||
// Assumes NTRU_SAMPLE_FT_BYTES = ceil(30*(n-1)/8) | |||
int32_t s[NTRU_N - 1]; | |||
int i; | |||
// Use 30 bits of u per word | |||
for (i = 0; i < (NTRU_N - 1) / 4; i++) { | |||
s[4 * i + 0] = (u[15 * i + 0] << 2) + (u[15 * i + 1] << 10) + (u[15 * i + 2] << 18) + ((uint32_t) u[15 * i + 3] << 26); | |||
s[4 * i + 1] = ((u[15 * i + 3] & 0xc0) >> 4) + (u[15 * i + 4] << 4) + (u[15 * i + 5] << 12) + (u[15 * i + 6] << 20) + ((uint32_t) u[15 * i + 7] << 28); | |||
s[4 * i + 2] = ((u[15 * i + 7] & 0xf0) >> 2) + (u[15 * i + 8] << 6) + (u[15 * i + 9] << 14) + (u[15 * i + 10] << 22) + ((uint32_t) u[15 * i + 11] << 30); | |||
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 16) + ((uint32_t) u[15 * i + 14] << 24); | |||
} | |||
for (i = 0; i < NTRU_WEIGHT / 2; i++) { | |||
s[i] |= 1; | |||
} | |||
for (i = NTRU_WEIGHT / 2; i < NTRU_WEIGHT; i++) { | |||
s[i] |= 2; | |||
} | |||
PQCLEAN_NTRUHPS2048509_CLEAN_crypto_sort_int32(s, NTRU_N - 1); | |||
for (i = 0; i < NTRU_N - 1; i++) { | |||
r->coeffs[i] = ((uint16_t) (s[i] & 3)); | |||
} | |||
r->coeffs[NTRU_N - 1] = 0; | |||
} |
@@ -1,17 +0,0 @@ | |||
#ifndef SAMPLE_H | |||
#define SAMPLE_H | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "crypto_sort_int32.h" | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]); | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_sample_fixed_type(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_FT_BYTES]); | |||
#endif |
@@ -1,26 +0,0 @@ | |||
#include "sample.h" | |||
static uint16_t mod3(uint16_t a) { | |||
uint16_t r; | |||
int16_t t, c; | |||
r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255 | |||
r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15 | |||
r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 | |||
r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3 | |||
t = r - 3; | |||
c = t >> 15; | |||
return (c & r) ^ (~c & t); | |||
} | |||
void PQCLEAN_NTRUHPS2048509_CLEAN_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]) { | |||
int i; | |||
/* {0,1,...,255} -> {0,1,2}; Pr[0] = 86/256, Pr[1] = Pr[-1] = 85/256 */ | |||
for (i = 0; i < NTRU_N - 1; i++) { | |||
r->coeffs[i] = mod3(uniformbytes[i]); | |||
} | |||
r->coeffs[NTRU_N - 1] = 0; | |||
} |
@@ -1,34 +0,0 @@ | |||
set( | |||
SRC_AVX2_NTRUHPS2048677 | |||
cmov.c | |||
crypto_sort_int32.c | |||
kem.c | |||
owcpa.c | |||
pack3.c | |||
packq.c | |||
poly.c | |||
poly_lift.c | |||
poly_mod_3_Phi_n.s | |||
poly_mod_q_Phi_n.s | |||
poly_r2_inv.c | |||
poly_r2_mul.s | |||
poly_rq_mul.s | |||
poly_rq_to_s3.s | |||
poly_s3_inv.c | |||
sample.c | |||
sample_iid.c | |||
square_10_677_shufbytes.s | |||
square_1_677_patience.s | |||
square_168_677_shufbytes.s | |||
square_21_677_shufbytes.s | |||
square_2_677_patience.s | |||
square_336_677_shufbytes.s | |||
square_3_677_patience.s | |||
square_42_677_shufbytes.s | |||
square_5_677_patience.s | |||
square_84_677_shufbytes.s | |||
vec32_sample_iid.s | |||
) | |||
define_kem_alg(ntruhps2048677_avx2 | |||
PQCLEAN_NTRUHPS2048677_AVX2 "${SRC_AVX2_NTRUHPS2048677}" "${CMAKE_CURRENT_SOURCE_DIR}") |
@@ -1,19 +0,0 @@ | |||
#ifndef PQCLEAN_NTRUHPS2048677_AVX2_API_H | |||
#define PQCLEAN_NTRUHPS2048677_AVX2_API_H | |||
#include <stdint.h> | |||
#define PQCLEAN_NTRUHPS2048677_AVX2_CRYPTO_SECRETKEYBYTES 1234 | |||
#define PQCLEAN_NTRUHPS2048677_AVX2_CRYPTO_PUBLICKEYBYTES 930 | |||
#define PQCLEAN_NTRUHPS2048677_AVX2_CRYPTO_CIPHERTEXTBYTES 930 | |||
#define PQCLEAN_NTRUHPS2048677_AVX2_CRYPTO_BYTES 32 | |||
#define PQCLEAN_NTRUHPS2048677_AVX2_CRYPTO_ALGNAME "ntruhps2048677" | |||
int PQCLEAN_NTRUHPS2048677_AVX2_crypto_kem_keypair(uint8_t *pk, uint8_t *sk); | |||
int PQCLEAN_NTRUHPS2048677_AVX2_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk); | |||
int PQCLEAN_NTRUHPS2048677_AVX2_crypto_kem_dec(uint8_t *k, const uint8_t *c, const uint8_t *sk); | |||
#endif |
@@ -1,11 +0,0 @@ | |||
#include "cmov.h" | |||
/* b = 1 means mov, b = 0 means don't mov*/ | |||
void PQCLEAN_NTRUHPS2048677_AVX2_cmov(unsigned char *r, const unsigned char *x, size_t len, unsigned char b) { | |||
size_t i; | |||
b = (~b + 1); | |||
for (i = 0; i < len; i++) { | |||
r[i] ^= b & (x[i] ^ r[i]); | |||
} | |||
} |
@@ -1,10 +0,0 @@ | |||
#ifndef VERIFY_H | |||
#define VERIFY_H | |||
#include "params.h" | |||
#include <stddef.h> | |||
void PQCLEAN_NTRUHPS2048677_AVX2_cmov(unsigned char *r, const unsigned char *x, size_t len, unsigned char b); | |||
#endif |
@@ -1,11 +0,0 @@ | |||
#ifndef CRYPTO_SORT | |||
#define CRYPTO_SORT | |||
#include "params.h" | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
void PQCLEAN_NTRUHPS2048677_AVX2_crypto_sort_int32(int32_t *x, size_t n); | |||
#endif |
@@ -1,63 +0,0 @@ | |||
#include "api.h" | |||
#include "cmov.h" | |||
#include "fips202.h" | |||
#include "owcpa.h" | |||
#include "params.h" | |||
#include "randombytes.h" | |||
#include "sample.h" | |||
// API FUNCTIONS | |||
int PQCLEAN_NTRUHPS2048677_AVX2_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { | |||
uint8_t seed[NTRU_SAMPLE_FG_BYTES]; | |||
randombytes(seed, NTRU_SAMPLE_FG_BYTES); | |||
PQCLEAN_NTRUHPS2048677_AVX2_owcpa_keypair(pk, sk, seed); | |||
randombytes(sk + NTRU_OWCPA_SECRETKEYBYTES, NTRU_PRFKEYBYTES); | |||
return 0; | |||
} | |||
int PQCLEAN_NTRUHPS2048677_AVX2_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk) { | |||
poly r, m; | |||
uint8_t rm[NTRU_OWCPA_MSGBYTES]; | |||
uint8_t rm_seed[NTRU_SAMPLE_RM_BYTES]; | |||
randombytes(rm_seed, NTRU_SAMPLE_RM_BYTES); | |||
PQCLEAN_NTRUHPS2048677_AVX2_sample_rm(&r, &m, rm_seed); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(rm, &r); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, &m); | |||
sha3_256(k, rm, NTRU_OWCPA_MSGBYTES); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Z3_to_Zq(&r); | |||
PQCLEAN_NTRUHPS2048677_AVX2_owcpa_enc(c, &r, &m, pk); | |||
return 0; | |||
} | |||
int PQCLEAN_NTRUHPS2048677_AVX2_crypto_kem_dec(uint8_t *k, const uint8_t *c, const uint8_t *sk) { | |||
int i, fail; | |||
uint8_t rm[NTRU_OWCPA_MSGBYTES]; | |||
uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES]; | |||
fail = PQCLEAN_NTRUHPS2048677_AVX2_owcpa_dec(rm, c, sk); | |||
/* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */ | |||
/* See comment in PQCLEAN_NTRUHPS2048677_AVX2_owcpa_dec for details. */ | |||
sha3_256(k, rm, NTRU_OWCPA_MSGBYTES); | |||
/* shake(secret PRF key || input ciphertext) */ | |||
for (i = 0; i < NTRU_PRFKEYBYTES; i++) { | |||
buf[i] = sk[i + NTRU_OWCPA_SECRETKEYBYTES]; | |||
} | |||
for (i = 0; i < NTRU_CIPHERTEXTBYTES; i++) { | |||
buf[NTRU_PRFKEYBYTES + i] = c[i]; | |||
} | |||
sha3_256(rm, buf, NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES); | |||
PQCLEAN_NTRUHPS2048677_AVX2_cmov(k, rm, NTRU_SHAREDKEYBYTES, (unsigned char) fail); | |||
return 0; | |||
} |
@@ -1,183 +0,0 @@ | |||
#include "owcpa.h" | |||
#include "poly.h" | |||
#include "sample.h" | |||
static int owcpa_check_ciphertext(const unsigned char *ciphertext) { | |||
/* A ciphertext is log2(q)*(n-1) bits packed into bytes. */ | |||
/* Check that any unused bits of the final byte are zero. */ | |||
uint16_t t = 0; | |||
t = ciphertext[NTRU_CIPHERTEXTBYTES - 1]; | |||
t &= 0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG))); | |||
/* We have 0 <= t < 256 */ | |||
/* Return 0 on success (t=0), 1 on failure */ | |||
return (int) (1 & ((~t + 1) >> 15)); | |||
} | |||
static int owcpa_check_r(const poly *r) { | |||
/* A valid r has coefficients in {0,1,q-1} and has r[N-1] = 0 */ | |||
/* Note: We may assume that 0 <= r[i] <= q-1 for all i */ | |||
int i; | |||
uint32_t t = 0; | |||
uint16_t c; | |||
for (i = 0; i < NTRU_N - 1; i++) { | |||
c = r->coeffs[i]; | |||
t |= (c + 1) & (NTRU_Q - 4); /* 0 iff c is in {-1,0,1,2} */ | |||
t |= (c + 2) & 4; /* 1 if c = 2, 0 if c is in {-1,0,1} */ | |||
} | |||
t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */ | |||
/* We have 0 <= t < 2^16. */ | |||
/* Return 0 on success (t=0), 1 on failure */ | |||
return (int) (1 & ((~t + 1) >> 31)); | |||
} | |||
static int owcpa_check_m(const poly *m) { | |||
/* Check that m is in message space, i.e. */ | |||
/* (1) |{i : m[i] = 1}| = |{i : m[i] = 2}|, and */ | |||
/* (2) |{i : m[i] != 0}| = NTRU_WEIGHT. */ | |||
/* Note: We may assume that m has coefficients in {0,1,2}. */ | |||
int i; | |||
uint32_t t = 0; | |||
uint16_t ps = 0; | |||
uint16_t ms = 0; | |||
for (i = 0; i < NTRU_N; i++) { | |||
ps += m->coeffs[i] & 1; | |||
ms += m->coeffs[i] & 2; | |||
} | |||
t |= ps ^ (ms >> 1); /* 0 if (1) holds */ | |||
t |= ms ^ NTRU_WEIGHT; /* 0 if (1) and (2) hold */ | |||
/* We have 0 <= t < 2^16. */ | |||
/* Return 0 on success (t=0), 1 on failure */ | |||
return (int) (1 & ((~t + 1) >> 31)); | |||
} | |||
void PQCLEAN_NTRUHPS2048677_AVX2_owcpa_keypair(unsigned char *pk, | |||
unsigned char *sk, | |||
const unsigned char seed[NTRU_SAMPLE_FG_BYTES]) { | |||
int i; | |||
poly x1, x2, x3, x4, x5; | |||
poly *f = &x1, *g = &x2, *invf_mod3 = &x3; | |||
poly *gf = &x3, *invgf = &x4, *tmp = &x5; | |||
poly *invh = &x3, *h = &x3; | |||
PQCLEAN_NTRUHPS2048677_AVX2_sample_fg(f, g, seed); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_inv(invf_mod3, f); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(sk, f); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(sk + NTRU_PACK_TRINARY_BYTES, invf_mod3); | |||
/* Lift coeffs of f and g from Z_p to Z_q */ | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Z3_to_Zq(f); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Z3_to_Zq(g); | |||
/* g = 3*g */ | |||
for (i = 0; i < NTRU_N; i++) { | |||
g->coeffs[i] = 3 * g->coeffs[i]; | |||
} | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(gf, g, f); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_inv(invgf, gf); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(tmp, invgf, f); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_mul(invh, tmp, f); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_tobytes(sk + 2 * NTRU_PACK_TRINARY_BYTES, invh); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(tmp, invgf, g); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(h, tmp, g); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_tobytes(pk, h); | |||
} | |||
void PQCLEAN_NTRUHPS2048677_AVX2_owcpa_enc(unsigned char *c, | |||
const poly *r, | |||
const poly *m, | |||
const unsigned char *pk) { | |||
int i; | |||
poly x1, x2; | |||
poly *h = &x1, *liftm = &x1; | |||
poly *ct = &x2; | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_frombytes(h, pk); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(ct, r, h); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_lift(liftm, m); | |||
for (i = 0; i < NTRU_N; i++) { | |||
ct->coeffs[i] = ct->coeffs[i] + liftm->coeffs[i]; | |||
} | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_tobytes(c, ct); | |||
} | |||
int PQCLEAN_NTRUHPS2048677_AVX2_owcpa_dec(unsigned char *rm, | |||
const unsigned char *ciphertext, | |||
const unsigned char *secretkey) { | |||
int i; | |||
int fail; | |||
poly x1, x2, x3, x4; | |||
poly *c = &x1, *f = &x2, *cf = &x3; | |||
poly *mf = &x2, *finv3 = &x3, *m = &x4; | |||
poly *liftm = &x2, *invh = &x3, *r = &x4; | |||
poly *b = &x1; | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_frombytes(c, ciphertext); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_frombytes(f, secretkey); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Z3_to_Zq(f); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(cf, c, f); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_to_S3(mf, cf); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_frombytes(finv3, secretkey + NTRU_PACK_TRINARY_BYTES); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_mul(m, mf, finv3); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, m); | |||
fail = 0; | |||
/* Check that the unused bits of the last byte of the ciphertext are zero */ | |||
fail |= owcpa_check_ciphertext(ciphertext); | |||
/* For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */ | |||
/* We can avoid re-computing r*h + Lift(m) as long as we check that */ | |||
/* r (defined as b/h mod (q, Phi_n)) and m are in the message space. */ | |||
/* (m can take any value in S3 in NTRU_HRSS) */ | |||
fail |= owcpa_check_m(m); | |||
/* b = c - Lift(m) mod (q, x^n - 1) */ | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_lift(liftm, m); | |||
for (i = 0; i < NTRU_N; i++) { | |||
b->coeffs[i] = c->coeffs[i] - liftm->coeffs[i]; | |||
} | |||
/* r = b / h mod (q, Phi_n) */ | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_frombytes(invh, secretkey + 2 * NTRU_PACK_TRINARY_BYTES); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_mul(r, b, invh); | |||
/* NOTE: Our definition of r as b/h mod (q, Phi_n) follows Figure 4 of */ | |||
/* [Sch18] https://eprint.iacr.org/2018/1174/20181203:032458. */ | |||
/* This differs from Figure 10 of Saito--Xagawa--Yamakawa */ | |||
/* [SXY17] https://eprint.iacr.org/2017/1005/20180516:055500 */ | |||
/* where r gets a final reduction modulo p. */ | |||
/* We need this change to use Proposition 1 of [Sch18]. */ | |||
/* Proposition 1 of [Sch18] shows that re-encryption with (r,m) yields c. */ | |||
/* if and only if fail==0 after the following call to owcpa_check_r */ | |||
/* The procedure given in Fig. 8 of [Sch18] can be skipped because we have */ | |||
/* c(1) = 0 due to the use of poly_Rq_sum_zero_{to,from}bytes. */ | |||
fail |= owcpa_check_r(r); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_trinary_Zq_to_Z3(r); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(rm, r); | |||
return fail; | |||
} |
@@ -1,19 +0,0 @@ | |||
#ifndef OWCPA_H | |||
#define OWCPA_H | |||
#include "params.h" | |||
#include "poly.h" | |||
void PQCLEAN_NTRUHPS2048677_AVX2_owcpa_keypair(unsigned char *pk, | |||
unsigned char *sk, | |||
const unsigned char seed[NTRU_SAMPLE_FG_BYTES]); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_owcpa_enc(unsigned char *c, | |||
const poly *r, | |||
const poly *m, | |||
const unsigned char *pk); | |||
int PQCLEAN_NTRUHPS2048677_AVX2_owcpa_dec(unsigned char *rm, | |||
const unsigned char *ciphertext, | |||
const unsigned char *secretkey); | |||
#endif |
@@ -1,46 +0,0 @@ | |||
#include "poly.h" | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(unsigned char msg[NTRU_OWCPA_MSGBYTES], const poly *a) { | |||
int i; | |||
unsigned char c; | |||
int j; | |||
for (i = 0; i < NTRU_PACK_DEG / 5; i++) { | |||
c = a->coeffs[5 * i + 4] & 255; | |||
c = (3 * c + a->coeffs[5 * i + 3]) & 255; | |||
c = (3 * c + a->coeffs[5 * i + 2]) & 255; | |||
c = (3 * c + a->coeffs[5 * i + 1]) & 255; | |||
c = (3 * c + a->coeffs[5 * i + 0]) & 255; | |||
msg[i] = c; | |||
} | |||
i = NTRU_PACK_DEG / 5; | |||
c = 0; | |||
for (j = NTRU_PACK_DEG - (5 * i) - 1; j >= 0; j--) { | |||
c = (3 * c + a->coeffs[5 * i + j]) & 255; | |||
} | |||
msg[i] = c; | |||
} | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_frombytes(poly *r, const unsigned char msg[NTRU_OWCPA_MSGBYTES]) { | |||
int i; | |||
unsigned char c; | |||
int j; | |||
for (i = 0; i < NTRU_PACK_DEG / 5; i++) { | |||
c = msg[i]; | |||
r->coeffs[5 * i + 0] = c; | |||
r->coeffs[5 * i + 1] = c * 171 >> 9; // this is division by 3 | |||
r->coeffs[5 * i + 2] = c * 57 >> 9; // division by 3^2 | |||
r->coeffs[5 * i + 3] = c * 19 >> 9; // division by 3^3 | |||
r->coeffs[5 * i + 4] = c * 203 >> 14; // etc. | |||
} | |||
i = NTRU_PACK_DEG / 5; | |||
c = msg[i]; | |||
for (j = 0; (5 * i + j) < NTRU_PACK_DEG; j++) { | |||
r->coeffs[5 * i + j] = c; | |||
c = c * 171 >> 9; | |||
} | |||
r->coeffs[NTRU_N - 1] = 0; | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_mod_3_Phi_n(r); | |||
} | |||
@@ -1,93 +0,0 @@ | |||
#include "poly.h" | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_tobytes(unsigned char *r, const poly *a) { | |||
int i, j; | |||
uint16_t t[8]; | |||
for (i = 0; i < NTRU_PACK_DEG / 8; i++) { | |||
for (j = 0; j < 8; j++) { | |||
t[j] = MODQ(a->coeffs[8 * i + j]); | |||
} | |||
r[11 * i + 0] = (unsigned char) ( t[0] & 0xff); | |||
r[11 * i + 1] = (unsigned char) ((t[0] >> 8) | ((t[1] & 0x1f) << 3)); | |||
r[11 * i + 2] = (unsigned char) ((t[1] >> 5) | ((t[2] & 0x03) << 6)); | |||
r[11 * i + 3] = (unsigned char) ((t[2] >> 2) & 0xff); | |||
r[11 * i + 4] = (unsigned char) ((t[2] >> 10) | ((t[3] & 0x7f) << 1)); | |||
r[11 * i + 5] = (unsigned char) ((t[3] >> 7) | ((t[4] & 0x0f) << 4)); | |||
r[11 * i + 6] = (unsigned char) ((t[4] >> 4) | ((t[5] & 0x01) << 7)); | |||
r[11 * i + 7] = (unsigned char) ((t[5] >> 1) & 0xff); | |||
r[11 * i + 8] = (unsigned char) ((t[5] >> 9) | ((t[6] & 0x3f) << 2)); | |||
r[11 * i + 9] = (unsigned char) ((t[6] >> 6) | ((t[7] & 0x07) << 5)); | |||
r[11 * i + 10] = (unsigned char) ((t[7] >> 3)); | |||
} | |||
for (j = 0; j < NTRU_PACK_DEG - 8 * i; j++) { | |||
t[j] = MODQ(a->coeffs[8 * i + j]); | |||
} | |||
for (; j < 8; j++) { | |||
t[j] = 0; | |||
} | |||
switch (NTRU_PACK_DEG & 0x07) { | |||
// cases 0 and 6 are impossible since 2 generates (Z/n)* and | |||
// p mod 8 in {1, 7} implies that 2 is a quadratic residue. | |||
case 4: | |||
r[11 * i + 0] = (unsigned char) (t[0] & 0xff); | |||
r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3); | |||
r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6); | |||
r[11 * i + 3] = (unsigned char) (t[2] >> 2) & 0xff; | |||
r[11 * i + 4] = (unsigned char) (t[2] >> 10) | ((t[3] & 0x7f) << 1); | |||
r[11 * i + 5] = (unsigned char) (t[3] >> 7) | ((t[4] & 0x0f) << 4); | |||
break; | |||
case 2: | |||
r[11 * i + 0] = (unsigned char) (t[0] & 0xff); | |||
r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3); | |||
r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6); | |||
break; | |||
} | |||
} | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_frombytes(poly *r, const unsigned char *a) { | |||
int i; | |||
for (i = 0; i < NTRU_PACK_DEG / 8; i++) { | |||
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8); | |||
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5); | |||
r->coeffs[8 * i + 2] = (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10); | |||
r->coeffs[8 * i + 3] = (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7); | |||
r->coeffs[8 * i + 4] = (a[11 * i + 5] >> 4) | (((uint16_t)a[11 * i + 6] & 0x7f) << 4); | |||
r->coeffs[8 * i + 5] = (a[11 * i + 6] >> 7) | (((uint16_t)a[11 * i + 7] & 0xff) << 1) | (((uint16_t)a[11 * i + 8] & 0x03) << 9); | |||
r->coeffs[8 * i + 6] = (a[11 * i + 8] >> 2) | (((uint16_t)a[11 * i + 9] & 0x1f) << 6); | |||
r->coeffs[8 * i + 7] = (a[11 * i + 9] >> 5) | (((uint16_t)a[11 * i + 10] & 0xff) << 3); | |||
} | |||
switch (NTRU_PACK_DEG & 0x07) { | |||
// cases 0 and 6 are impossible since 2 generates (Z/n)* and | |||
// p mod 8 in {1, 7} implies that 2 is a quadratic residue. | |||
case 4: | |||
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8); | |||
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5); | |||
r->coeffs[8 * i + 2] = (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10); | |||
r->coeffs[8 * i + 3] = (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7); | |||
break; | |||
case 2: | |||
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8); | |||
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5); | |||
break; | |||
} | |||
r->coeffs[NTRU_N - 1] = 0; | |||
} | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a) { | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_tobytes(r, a); | |||
} | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_frombytes(poly *r, const unsigned char *a) { | |||
int i; | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_frombytes(r, a); | |||
/* Set r[n-1] so that the sum of coefficients is zero mod q */ | |||
r->coeffs[NTRU_N - 1] = 0; | |||
for (i = 0; i < NTRU_PACK_DEG; i++) { | |||
r->coeffs[NTRU_N - 1] -= r->coeffs[i]; | |||
} | |||
} |
@@ -1,37 +0,0 @@ | |||
#ifndef PARAMS_H | |||
#define PARAMS_H | |||
#define NTRU_HPS | |||
#define NTRU_N 677 | |||
#define NTRU_LOGQ 11 | |||
/* Do not modify below this line */ | |||
#define PAD32(X) ((((X) + 31)/32)*32) | |||
#define NTRU_Q (1 << NTRU_LOGQ) | |||
#define NTRU_WEIGHT (NTRU_Q/8 - 2) | |||
#define NTRU_SEEDBYTES 32 | |||
#define NTRU_PRFKEYBYTES 32 | |||
#define NTRU_SHAREDKEYBYTES 32 | |||
#define NTRU_SAMPLE_IID_BYTES (NTRU_N-1) | |||
#define NTRU_SAMPLE_FT_BYTES ((30*(NTRU_N-1)+7)/8) | |||
#define NTRU_SAMPLE_FG_BYTES (NTRU_SAMPLE_IID_BYTES+NTRU_SAMPLE_FT_BYTES) | |||
#define NTRU_SAMPLE_RM_BYTES (NTRU_SAMPLE_IID_BYTES+NTRU_SAMPLE_FT_BYTES) | |||
#define NTRU_PACK_DEG (NTRU_N-1) | |||
#define NTRU_PACK_TRINARY_BYTES ((NTRU_PACK_DEG+4)/5) | |||
#define NTRU_OWCPA_MSGBYTES (2*NTRU_PACK_TRINARY_BYTES) | |||
#define NTRU_OWCPA_PUBLICKEYBYTES ((NTRU_LOGQ*NTRU_PACK_DEG+7)/8) | |||
#define NTRU_OWCPA_SECRETKEYBYTES (2*NTRU_PACK_TRINARY_BYTES + NTRU_OWCPA_PUBLICKEYBYTES) | |||
#define NTRU_OWCPA_BYTES ((NTRU_LOGQ*NTRU_PACK_DEG+7)/8) | |||
#define NTRU_PUBLICKEYBYTES (NTRU_OWCPA_PUBLICKEYBYTES) | |||
#define NTRU_SECRETKEYBYTES (NTRU_OWCPA_SECRETKEYBYTES + NTRU_PRFKEYBYTES) | |||
#define NTRU_CIPHERTEXTBYTES (NTRU_OWCPA_BYTES) | |||
#endif |
@@ -1,75 +0,0 @@ | |||
#include "poly.h" | |||
/* Map {0, 1, 2} -> {0,1,q-1} in place */ | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Z3_to_Zq(poly *r) { | |||
int i; | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = r->coeffs[i] | ((-(r->coeffs[i] >> 1)) & (NTRU_Q - 1)); | |||
} | |||
} | |||
/* Map {0, 1, q-1} -> {0,1,2} in place */ | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_trinary_Zq_to_Z3(poly *r) { | |||
int i; | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = MODQ(r->coeffs[i]); | |||
r->coeffs[i] = 3 & (r->coeffs[i] ^ (r->coeffs[i] >> (NTRU_LOGQ - 1))); | |||
} | |||
} | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_mul(poly *r, const poly *a, const poly *b) { | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(r, a, b); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_mod_q_Phi_n(r); | |||
} | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_mul(poly *r, const poly *a, const poly *b) { | |||
int i; | |||
/* Our S3 multiplications do not overflow mod q, */ | |||
/* so we can re-purpose PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul, as long as we */ | |||
/* follow with an explicit reduction mod q. */ | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(r, a, b); | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = MODQ(r->coeffs[i]); | |||
} | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_mod_3_Phi_n(r); | |||
} | |||
static void PQCLEAN_NTRUHPS2048677_AVX2_poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) { | |||
int i; | |||
poly b, c; | |||
poly s; | |||
// for 0..4 | |||
// ai = ai * (2 - a*ai) mod q | |||
for (i = 0; i < NTRU_N; i++) { | |||
b.coeffs[i] = -(a->coeffs[i]); | |||
} | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = ai->coeffs[i]; | |||
} | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(&c, r, &b); | |||
c.coeffs[0] += 2; // c = 2 - a*ai | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(&s, &c, r); // s = ai*c | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(&c, &s, &b); | |||
c.coeffs[0] += 2; // c = 2 - a*s | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(r, &c, &s); // r = s*c | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(&c, r, &b); | |||
c.coeffs[0] += 2; // c = 2 - a*r | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(&s, &c, r); // s = r*c | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(&c, &s, &b); | |||
c.coeffs[0] += 2; // c = 2 - a*s | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(r, &c, &s); // r = s*c | |||
} | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_inv(poly *r, const poly *a) { | |||
poly ai2; | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_R2_inv(&ai2, a); | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_R2_inv_to_Rq_inv(r, &ai2, a); | |||
} |
@@ -1,41 +0,0 @@ | |||
#ifndef POLY_H | |||
#define POLY_H | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include "params.h" | |||
#define MODQ(X) ((X) & (NTRU_Q-1)) | |||
typedef union { /* align to 32 byte boundary for vmovdqa */ | |||
uint16_t coeffs[PAD32(NTRU_N)]; | |||
__m256i coeffs_x16[PAD32(NTRU_N) / 16]; | |||
} poly; | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_mod_3_Phi_n(poly *r); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_mod_q_Phi_n(poly *r); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_tobytes(unsigned char *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_frombytes(poly *r, const unsigned char *a); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_frombytes(poly *r, const unsigned char *a); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(unsigned char msg[NTRU_PACK_TRINARY_BYTES], const poly *a); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_frombytes(poly *r, const unsigned char msg[NTRU_PACK_TRINARY_BYTES]); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_mul(poly *r, const poly *a, const poly *b); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(poly *r, const poly *a, const poly *b); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_mul(poly *r, const poly *a, const poly *b); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_lift(poly *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_to_S3(poly *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_R2_inv(poly *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_inv(poly *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_inv(poly *r, const poly *a); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Z3_to_Zq(poly *r); | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_trinary_Zq_to_Z3(poly *r); | |||
#endif |
@@ -1,11 +0,0 @@ | |||
#include "poly.h" | |||
void PQCLEAN_NTRUHPS2048677_AVX2_poly_lift(poly *r, const poly *a) { | |||
int i; | |||
for (i = 0; i < NTRU_N; i++) { | |||
r->coeffs[i] = a->coeffs[i]; | |||
} | |||
PQCLEAN_NTRUHPS2048677_AVX2_poly_Z3_to_Zq(r); | |||
} | |||