From a4906713be256046bd1454df4a7bf0da673222b3 Mon Sep 17 00:00:00 2001 From: "Matthias J. Kannwischer" Date: Mon, 20 May 2019 15:12:51 +0200 Subject: [PATCH] use optimized matrix_shake.c for frodokem640shake --- crypto_kem/frodokem640shake/META.yml | 2 + crypto_kem/frodokem640shake/opt/LICENSE | 21 ++ crypto_kem/frodokem640shake/opt/Makefile | 19 ++ .../opt/Makefile.Microsoft_nmake | 19 ++ crypto_kem/frodokem640shake/opt/api.h | 20 ++ crypto_kem/frodokem640shake/opt/common.h | 19 ++ crypto_kem/frodokem640shake/opt/kem.c | 238 ++++++++++++++++++ .../frodokem640shake/opt/matrix_shake.c | 206 +++++++++++++++ crypto_kem/frodokem640shake/opt/noise.c | 35 +++ crypto_kem/frodokem640shake/opt/params.h | 27 ++ crypto_kem/frodokem640shake/opt/util.c | 235 +++++++++++++++++ 11 files changed, 841 insertions(+) create mode 100644 crypto_kem/frodokem640shake/opt/LICENSE create mode 100644 crypto_kem/frodokem640shake/opt/Makefile create mode 100644 crypto_kem/frodokem640shake/opt/Makefile.Microsoft_nmake create mode 100644 crypto_kem/frodokem640shake/opt/api.h create mode 100644 crypto_kem/frodokem640shake/opt/common.h create mode 100644 crypto_kem/frodokem640shake/opt/kem.c create mode 100644 crypto_kem/frodokem640shake/opt/matrix_shake.c create mode 100644 crypto_kem/frodokem640shake/opt/noise.c create mode 100644 crypto_kem/frodokem640shake/opt/params.h create mode 100644 crypto_kem/frodokem640shake/opt/util.c diff --git a/crypto_kem/frodokem640shake/META.yml b/crypto_kem/frodokem640shake/META.yml index 99e685c5..2a37b7f9 100644 --- a/crypto_kem/frodokem640shake/META.yml +++ b/crypto_kem/frodokem640shake/META.yml @@ -23,3 +23,5 @@ auxiliary-submitters: implementations: - name: clean version: https://github.com/Microsoft/PQCrypto-LWEKE/commit/d5bbd0417ba111b08a959c0042a1dcc65fb14a89 +- name: opt + version: https://github.com/Microsoft/PQCrypto-LWEKE/commit/d5bbd0417ba111b08a959c0042a1dcc65fb14a89 diff --git a/crypto_kem/frodokem640shake/opt/LICENSE b/crypto_kem/frodokem640shake/opt/LICENSE new file mode 100644 index 00000000..5cf7c8db --- /dev/null +++ b/crypto_kem/frodokem640shake/opt/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) Microsoft Corporation. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE diff --git a/crypto_kem/frodokem640shake/opt/Makefile b/crypto_kem/frodokem640shake/opt/Makefile new file mode 100644 index 00000000..6059e5df --- /dev/null +++ b/crypto_kem/frodokem640shake/opt/Makefile @@ -0,0 +1,19 @@ +# This Makefile can be used with GNU Make or BSD Make + +LIB=libfrodokem640shake_opt.a +HEADERS=api.h params.h common.h +OBJECTS=kem.o matrix_shake.o noise.o util.o + +CFLAGS=-O3 -Wall -Wextra -Wpedantic -Wvla -Werror -Wmissing-prototypes -std=c99 -I../../../common $(EXTRAFLAGS) + +all: $(LIB) + +%.o: %.c $(HEADERS) + $(CC) $(CFLAGS) -c -o $@ $< + +$(LIB): $(OBJECTS) + $(AR) -r $@ $(OBJECTS) + +clean: + $(RM) $(OBJECTS) + $(RM) $(LIB) diff --git a/crypto_kem/frodokem640shake/opt/Makefile.Microsoft_nmake b/crypto_kem/frodokem640shake/opt/Makefile.Microsoft_nmake new file mode 100644 index 00000000..3a590958 --- /dev/null +++ b/crypto_kem/frodokem640shake/opt/Makefile.Microsoft_nmake @@ -0,0 +1,19 @@ +# This Makefile can be used with Microsoft Visual Studio's nmake using the command: +# nmake /f Makefile.Microsoft_nmake + +LIBRARY=libfrodokem640shake_opt.lib +OBJECTS=kem.obj matrix_shake.obj noise.obj util.obj + +CFLAGS=/nologo /I ..\..\..\common /W4 /WX + +all: $(LIBRARY) + +# Make sure objects are recompiled if headers change. +$(OBJECTS): *.h + +$(LIBRARY): $(OBJECTS) + LIB.EXE /NOLOGO /WX /OUT:$@ $** + +clean: + -DEL $(OBJECTS) + -DEL $(LIBRARY) diff --git a/crypto_kem/frodokem640shake/opt/api.h b/crypto_kem/frodokem640shake/opt/api.h new file mode 100644 index 00000000..c63d9f9d --- /dev/null +++ b/crypto_kem/frodokem640shake/opt/api.h @@ -0,0 +1,20 @@ +#ifndef PQCLEAN_FRODOKEM640SHAKE_OPT_API_H +#define PQCLEAN_FRODOKEM640SHAKE_OPT_API_H + +#include +#include + +#define PQCLEAN_FRODOKEM640SHAKE_OPT_CRYPTO_SECRETKEYBYTES 19888 // sizeof(s) + CRYPTO_PUBLICKEYBYTES + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH +#define PQCLEAN_FRODOKEM640SHAKE_OPT_CRYPTO_PUBLICKEYBYTES 9616 // sizeof(seed_A) + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 +#define PQCLEAN_FRODOKEM640SHAKE_OPT_CRYPTO_BYTES 16 +#define PQCLEAN_FRODOKEM640SHAKE_OPT_CRYPTO_CIPHERTEXTBYTES 9720 // (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + (PARAMS_LOGQ*PARAMS_NBAR*PARAMS_NBAR)/8 + +#define PQCLEAN_FRODOKEM640SHAKE_OPT_CRYPTO_ALGNAME "FrodoKEM-640-SHAKE" + +int PQCLEAN_FRODOKEM640SHAKE_OPT_crypto_kem_keypair(uint8_t *pk, uint8_t *sk); + +int PQCLEAN_FRODOKEM640SHAKE_OPT_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk); + +int PQCLEAN_FRODOKEM640SHAKE_OPT_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk); + +#endif diff --git a/crypto_kem/frodokem640shake/opt/common.h b/crypto_kem/frodokem640shake/opt/common.h new file mode 100644 index 00000000..2910c84f --- /dev/null +++ b/crypto_kem/frodokem640shake/opt/common.h @@ -0,0 +1,19 @@ +#ifndef COMMON_H +#define COMMON_H + +int PQCLEAN_FRODOKEM640SHAKE_OPT_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); +int PQCLEAN_FRODOKEM640SHAKE_OPT_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); +void PQCLEAN_FRODOKEM640SHAKE_OPT_sample_n(uint16_t *s, size_t n); +void PQCLEAN_FRODOKEM640SHAKE_OPT_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s); +void PQCLEAN_FRODOKEM640SHAKE_OPT_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e); +void PQCLEAN_FRODOKEM640SHAKE_OPT_add(uint16_t *out, const uint16_t *a, const uint16_t *b); +void PQCLEAN_FRODOKEM640SHAKE_OPT_sub(uint16_t *out, const uint16_t *a, const uint16_t *b); +void PQCLEAN_FRODOKEM640SHAKE_OPT_key_encode(uint16_t *out, const uint16_t *in); +void PQCLEAN_FRODOKEM640SHAKE_OPT_key_decode(uint16_t *out, const uint16_t *in); +void PQCLEAN_FRODOKEM640SHAKE_OPT_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb); +void PQCLEAN_FRODOKEM640SHAKE_OPT_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb); +void PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes(uint8_t *mem, size_t n); +uint16_t PQCLEAN_FRODOKEM640SHAKE_OPT_LE_TO_UINT16(uint16_t n); +uint16_t PQCLEAN_FRODOKEM640SHAKE_OPT_UINT16_TO_LE(uint16_t n); + +#endif diff --git a/crypto_kem/frodokem640shake/opt/kem.c b/crypto_kem/frodokem640shake/opt/kem.c new file mode 100644 index 00000000..50bb88c8 --- /dev/null +++ b/crypto_kem/frodokem640shake/opt/kem.c @@ -0,0 +1,238 @@ +/******************************************************************************************** +* FrodoKEM: Learning with Errors Key Encapsulation +* +* Abstract: Key Encapsulation Mechanism (KEM) based on Frodo +*********************************************************************************************/ + +#include +#include + +#include "fips202.h" +#include "randombytes.h" + +#include "api.h" +#include "common.h" +#include "params.h" + +int PQCLEAN_FRODOKEM640SHAKE_OPT_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { + // FrodoKEM's key generation + // Outputs: public key pk ( BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 bytes) + // secret key sk (CRYPTO_BYTES + BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH bytes) + uint8_t *pk_seedA = &pk[0]; + uint8_t *pk_b = &pk[BYTES_SEED_A]; + uint8_t *sk_s = &sk[0]; + uint8_t *sk_pk = &sk[CRYPTO_BYTES]; + uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES]; + uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR]; + uint16_t B[PARAMS_N * PARAMS_NBAR] = {0}; + uint16_t S[2 * PARAMS_N * PARAMS_NBAR] = {0}; // contains secret data + uint16_t *E = &S[PARAMS_N * PARAMS_NBAR]; // contains secret data + uint8_t randomness[2 * CRYPTO_BYTES + BYTES_SEED_A]; // contains secret data via randomness_s and randomness_seedSE + uint8_t *randomness_s = &randomness[0]; // contains secret data + uint8_t *randomness_seedSE = &randomness[CRYPTO_BYTES]; // contains secret data + uint8_t *randomness_z = &randomness[2 * CRYPTO_BYTES]; + uint8_t shake_input_seedSE[1 + CRYPTO_BYTES]; // contains secret data + + // Generate the secret value s, the seed for S and E, and the seed for the seed for A. Add seed_A to the public key + randombytes(randomness, CRYPTO_BYTES + CRYPTO_BYTES + BYTES_SEED_A); + shake(pk_seedA, BYTES_SEED_A, randomness_z, BYTES_SEED_A); + + // Generate S and E, and compute B = A*S + E. Generate A on-the-fly + shake_input_seedSE[0] = 0x5F; + memcpy(&shake_input_seedSE[1], randomness_seedSE, CRYPTO_BYTES); + shake((uint8_t *)S, 2 * PARAMS_N * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES); + for (size_t i = 0; i < 2 * PARAMS_N * PARAMS_NBAR; i++) { + S[i] = PQCLEAN_FRODOKEM640SHAKE_OPT_LE_TO_UINT16(S[i]); + } + PQCLEAN_FRODOKEM640SHAKE_OPT_sample_n(S, PARAMS_N * PARAMS_NBAR); + PQCLEAN_FRODOKEM640SHAKE_OPT_sample_n(E, PARAMS_N * PARAMS_NBAR); + PQCLEAN_FRODOKEM640SHAKE_OPT_mul_add_as_plus_e(B, S, E, pk); + + // Encode the second part of the public key + PQCLEAN_FRODOKEM640SHAKE_OPT_pack(pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, B, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ); + + // Add s, pk and S to the secret key + memcpy(sk_s, randomness_s, CRYPTO_BYTES); + memcpy(sk_pk, pk, CRYPTO_PUBLICKEYBYTES); + for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) { + S[i] = PQCLEAN_FRODOKEM640SHAKE_OPT_UINT16_TO_LE(S[i]); + } + memcpy(sk_S, S, 2 * PARAMS_N * PARAMS_NBAR); + + // Add H(pk) to the secret key + shake(sk_pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES); + + // Cleanup: + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes((uint8_t *)E, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes(randomness, 2 * CRYPTO_BYTES); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES); + return 0; +} + + +int PQCLEAN_FRODOKEM640SHAKE_OPT_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) { + // FrodoKEM's key encapsulation + const uint8_t *pk_seedA = &pk[0]; + const uint8_t *pk_b = &pk[BYTES_SEED_A]; + uint8_t *ct_c1 = &ct[0]; + uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8]; + uint16_t B[PARAMS_N * PARAMS_NBAR] = {0}; + uint16_t V[PARAMS_NBAR * PARAMS_NBAR] = {0}; // contains secret data + uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0}; + uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0}; + uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data + uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR]; // contains secret data + uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR]; // contains secret data + uint8_t G2in[BYTES_PKHASH + BYTES_MU]; // contains secret data via mu + uint8_t *pkh = &G2in[0]; + uint8_t *mu = &G2in[BYTES_PKHASH]; // contains secret data + uint8_t G2out[2 * CRYPTO_BYTES]; // contains secret data + uint8_t *seedSE = &G2out[0]; // contains secret data + uint8_t *k = &G2out[CRYPTO_BYTES]; // contains secret data + uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES]; // contains secret data via Fin_k + uint8_t *Fin_ct = &Fin[0]; + uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES]; // contains secret data + uint8_t shake_input_seedSE[1 + CRYPTO_BYTES]; // contains secret data + + // pkh <- G_1(pk), generate random mu, compute (seedSE || k) = G_2(pkh || mu) + shake(pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES); + randombytes(mu, BYTES_MU); + shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU); + + // Generate Sp and Ep, and compute Bp = Sp*A + Ep. Generate A on-the-fly + shake_input_seedSE[0] = 0x96; + memcpy(&shake_input_seedSE[1], seedSE, CRYPTO_BYTES); + shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES); + for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) { + Sp[i] = PQCLEAN_FRODOKEM640SHAKE_OPT_LE_TO_UINT16(Sp[i]); + } + PQCLEAN_FRODOKEM640SHAKE_OPT_sample_n(Sp, PARAMS_N * PARAMS_NBAR); + PQCLEAN_FRODOKEM640SHAKE_OPT_sample_n(Ep, PARAMS_N * PARAMS_NBAR); + PQCLEAN_FRODOKEM640SHAKE_OPT_mul_add_sa_plus_e(Bp, Sp, Ep, pk_seedA); + PQCLEAN_FRODOKEM640SHAKE_OPT_pack(ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, Bp, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ); + + // Generate Epp, and compute V = Sp*B + Epp + PQCLEAN_FRODOKEM640SHAKE_OPT_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR); + PQCLEAN_FRODOKEM640SHAKE_OPT_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ); + PQCLEAN_FRODOKEM640SHAKE_OPT_mul_add_sb_plus_e(V, B, Sp, Epp); + + // Encode mu, and compute C = V + enc(mu) (mod q) + PQCLEAN_FRODOKEM640SHAKE_OPT_key_encode(C, (uint16_t *)mu); + PQCLEAN_FRODOKEM640SHAKE_OPT_add(C, V, C); + PQCLEAN_FRODOKEM640SHAKE_OPT_pack(ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, C, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOGQ); + + // Compute ss = F(ct||KK) + memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES); + memcpy(Fin_k, k, CRYPTO_BYTES); + shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES); + + // Cleanup: + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes((uint8_t *)V, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes(mu, BYTES_MU); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes(G2out, 2 * CRYPTO_BYTES); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes(Fin_k, CRYPTO_BYTES); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES); + return 0; +} + + +int PQCLEAN_FRODOKEM640SHAKE_OPT_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) { + // FrodoKEM's key decapsulation + uint16_t B[PARAMS_N * PARAMS_NBAR] = {0}; + uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0}; + uint16_t W[PARAMS_NBAR * PARAMS_NBAR] = {0}; // contains secret data + uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0}; + uint16_t CC[PARAMS_NBAR * PARAMS_NBAR] = {0}; + uint16_t BBp[PARAMS_N * PARAMS_NBAR] = {0}; + uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data + uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR]; // contains secret data + uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR]; // contains secret data + const uint8_t *ct_c1 = &ct[0]; + const uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8]; + const uint8_t *sk_s = &sk[0]; + const uint8_t *sk_pk = &sk[CRYPTO_BYTES]; + const uint16_t *sk_S = (uint16_t *) &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES]; + uint16_t S[PARAMS_N * PARAMS_NBAR]; // contains secret data + const uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR]; + const uint8_t *pk_seedA = &sk_pk[0]; + const uint8_t *pk_b = &sk_pk[BYTES_SEED_A]; + uint8_t G2in[BYTES_PKHASH + BYTES_MU]; // contains secret data via muprime + uint8_t *pkh = &G2in[0]; + uint8_t *muprime = &G2in[BYTES_PKHASH]; // contains secret data + uint8_t G2out[2 * CRYPTO_BYTES]; // contains secret data + uint8_t *seedSEprime = &G2out[0]; // contains secret data + uint8_t *kprime = &G2out[CRYPTO_BYTES]; // contains secret data + uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES]; // contains secret data via Fin_k + uint8_t *Fin_ct = &Fin[0]; + uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES]; // contains secret data + uint8_t shake_input_seedSEprime[1 + CRYPTO_BYTES]; // contains secret data + + for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) { + S[i] = PQCLEAN_FRODOKEM640SHAKE_OPT_LE_TO_UINT16(sk_S[i]); + } + + // Compute W = C - Bp*S (mod q), and decode the randomness mu + PQCLEAN_FRODOKEM640SHAKE_OPT_unpack(Bp, PARAMS_N * PARAMS_NBAR, ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, PARAMS_LOGQ); + PQCLEAN_FRODOKEM640SHAKE_OPT_unpack(C, PARAMS_NBAR * PARAMS_NBAR, ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, PARAMS_LOGQ); + PQCLEAN_FRODOKEM640SHAKE_OPT_mul_bs(W, Bp, S); + PQCLEAN_FRODOKEM640SHAKE_OPT_sub(W, C, W); + PQCLEAN_FRODOKEM640SHAKE_OPT_key_decode((uint16_t *)muprime, W); + + // Generate (seedSE' || k') = G_2(pkh || mu') + memcpy(pkh, sk_pkh, BYTES_PKHASH); + shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU); + + // Generate Sp and Ep, and compute BBp = Sp*A + Ep. Generate A on-the-fly + shake_input_seedSEprime[0] = 0x96; + memcpy(&shake_input_seedSEprime[1], seedSEprime, CRYPTO_BYTES); + shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSEprime, 1 + CRYPTO_BYTES); + for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) { + Sp[i] = PQCLEAN_FRODOKEM640SHAKE_OPT_LE_TO_UINT16(Sp[i]); + } + PQCLEAN_FRODOKEM640SHAKE_OPT_sample_n(Sp, PARAMS_N * PARAMS_NBAR); + PQCLEAN_FRODOKEM640SHAKE_OPT_sample_n(Ep, PARAMS_N * PARAMS_NBAR); + PQCLEAN_FRODOKEM640SHAKE_OPT_mul_add_sa_plus_e(BBp, Sp, Ep, pk_seedA); + + // Generate Epp, and compute W = Sp*B + Epp + PQCLEAN_FRODOKEM640SHAKE_OPT_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR); + PQCLEAN_FRODOKEM640SHAKE_OPT_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ); + PQCLEAN_FRODOKEM640SHAKE_OPT_mul_add_sb_plus_e(W, B, Sp, Epp); + + // Encode mu, and compute CC = W + enc(mu') (mod q) + PQCLEAN_FRODOKEM640SHAKE_OPT_key_encode(CC, (uint16_t *)muprime); + PQCLEAN_FRODOKEM640SHAKE_OPT_add(CC, W, CC); + + // Prepare input to F + memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES); + + // Reducing BBp modulo q + for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) { + BBp[i] = BBp[i] & ((1 << PARAMS_LOGQ) - 1); + } + + // Is (Bp == BBp & C == CC) = true + if (memcmp(Bp, BBp, 2 * PARAMS_N * PARAMS_NBAR) == 0 && memcmp(C, CC, 2 * PARAMS_NBAR * PARAMS_NBAR) == 0) { + // Load k' to do ss = F(ct || k') + memcpy(Fin_k, kprime, CRYPTO_BYTES); + } else { + // Load s to do ss = F(ct || s) + memcpy(Fin_k, sk_s, CRYPTO_BYTES); + } + shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES); + + // Cleanup: + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes((uint8_t *)W, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t)); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t)); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes(muprime, BYTES_MU); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes(G2out, 2 * CRYPTO_BYTES); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes(Fin_k, CRYPTO_BYTES); + PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes(shake_input_seedSEprime, 1 + CRYPTO_BYTES); + return 0; +} diff --git a/crypto_kem/frodokem640shake/opt/matrix_shake.c b/crypto_kem/frodokem640shake/opt/matrix_shake.c new file mode 100644 index 00000000..0eb59f6b --- /dev/null +++ b/crypto_kem/frodokem640shake/opt/matrix_shake.c @@ -0,0 +1,206 @@ +/******************************************************************************************** +* FrodoKEM: Learning with Errors Key Encapsulation +* +* Abstract: matrix arithmetic functions used by the KEM +*********************************************************************************************/ + +#include +#include + +#include "fips202.h" + +#include "api.h" +#include "common.h" +#include "params.h" +#define USE_SHAKE128_FOR_A + +int PQCLEAN_FRODOKEM640SHAKE_OPT_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) { + // Generate-and-multiply: generate matrix A (N x N) row-wise, multiply by s on the right. + // Inputs: s, e (N x N_BAR) + // Output: out = A*s + e (N x N_BAR) + int i, j, k; + int16_t a_row[4 * PARAMS_N] = {0}; + + for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) { + *((uint32_t *)&out[i]) = *((uint32_t *)&e[i]); + } + + #if defined(USE_AES128_FOR_A) + int16_t a_row_temp[4 * PARAMS_N] = {0}; // Take four lines of A at once + #if !defined(USE_OPENSSL) + uint8_t aes_key_schedule[16 * 11]; + AES128_load_schedule(seed_A, aes_key_schedule); + #else + EVP_CIPHER_CTX *aes_key_schedule; + int len; + if (!(aes_key_schedule = EVP_CIPHER_CTX_new())) { + handleErrors(); + } + if (1 != EVP_EncryptInit_ex(aes_key_schedule, EVP_aes_128_ecb(), NULL, seed_A, NULL)) { + handleErrors(); + } + #endif + + for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) { + a_row_temp[j + 1 + 0 * PARAMS_N] = j; // Loading values in the little-endian order + a_row_temp[j + 1 + 1 * PARAMS_N] = j; + a_row_temp[j + 1 + 2 * PARAMS_N] = j; + a_row_temp[j + 1 + 3 * PARAMS_N] = j; + } + + for (i = 0; i < PARAMS_N; i += 4) { + for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) { // Go through A, four rows at a time + a_row_temp[j + 0 * PARAMS_N] = i + 0; // Loading values in the little-endian order + a_row_temp[j + 1 * PARAMS_N] = i + 1; + a_row_temp[j + 2 * PARAMS_N] = i + 2; + a_row_temp[j + 3 * PARAMS_N] = i + 3; + } + + #if !defined(USE_OPENSSL) + AES128_ECB_enc_sch((uint8_t *)a_row_temp, 4 * PARAMS_N * sizeof(int16_t), aes_key_schedule, (uint8_t *)a_row); + #else + if (1 != EVP_EncryptUpdate(aes_key_schedule, (uint8_t *)a_row, &len, (uint8_t *)a_row_temp, 4 * PARAMS_N * sizeof(int16_t))) { + handleErrors(); + } + #endif + #elif defined (USE_SHAKE128_FOR_A) + uint8_t seed_A_separated[2 + BYTES_SEED_A]; + uint16_t *seed_A_origin = (uint16_t *)&seed_A_separated; + memcpy(&seed_A_separated[2], seed_A, BYTES_SEED_A); + for (i = 0; i < PARAMS_N; i += 4) { + seed_A_origin[0] = (uint16_t) (i + 0); + shake128((unsigned char *)(a_row + 0 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); + seed_A_origin[0] = (uint16_t) (i + 1); + shake128((unsigned char *)(a_row + 1 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); + seed_A_origin[0] = (uint16_t) (i + 2); + shake128((unsigned char *)(a_row + 2 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); + seed_A_origin[0] = (uint16_t) (i + 3); + shake128((unsigned char *)(a_row + 3 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); + #endif + + for (k = 0; k < PARAMS_NBAR; k++) { + uint16_t sum[4] = {0}; + for (j = 0; j < PARAMS_N; j++) { // Matrix-vector multiplication + uint16_t sp = s[k * PARAMS_N + j]; + sum[0] += a_row[0 * PARAMS_N + j] * sp; // Go through four lines with same s + sum[1] += a_row[1 * PARAMS_N + j] * sp; + sum[2] += a_row[2 * PARAMS_N + j] * sp; + sum[3] += a_row[3 * PARAMS_N + j] * sp; + } + out[(i + 0)*PARAMS_NBAR + k] += sum[0]; + out[(i + 2)*PARAMS_NBAR + k] += sum[2]; + out[(i + 1)*PARAMS_NBAR + k] += sum[1]; + out[(i + 3)*PARAMS_NBAR + k] += sum[3]; + } + } + + #if defined(USE_AES128_FOR_A) + AES128_free_schedule(aes_key_schedule); + #endif + return 1; +} + + +int PQCLEAN_FRODOKEM640SHAKE_OPT_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) { + // Generate-and-multiply: generate matrix A (N x N) column-wise, multiply by s' on the left. + // Inputs: s', e' (N_BAR x N) + // Output: out = s'*A + e' (N_BAR x N) + int i, j, k, kk; + + for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) { + *((uint32_t *)&out[i]) = *((uint32_t *)&e[i]); + } + + #if defined(USE_AES128_FOR_A) + uint16_t a_cols[PARAMS_N * PARAMS_STRIPE_STEP] = {0}; + uint16_t a_cols_t[PARAMS_N * PARAMS_STRIPE_STEP] = {0}; + uint16_t a_cols_temp[PARAMS_N * PARAMS_STRIPE_STEP] = {0}; + #if !defined(USE_OPENSSL) + uint8_t aes_key_schedule[16 * 11]; + AES128_load_schedule(seed_A, aes_key_schedule); + #else + EVP_CIPHER_CTX *aes_key_schedule; + int len; + if (!(aes_key_schedule = EVP_CIPHER_CTX_new())) { + handleErrors(); + } + if (1 != EVP_EncryptInit_ex(aes_key_schedule, EVP_aes_128_ecb(), NULL, seed_A, NULL)) { + handleErrors(); + } + #endif + + for (i = 0, j = 0; i < PARAMS_N; i++, j += PARAMS_STRIPE_STEP) { + a_cols_temp[j] = i; // Loading values in the little-endian order + } + + for (kk = 0; kk < PARAMS_N; kk += PARAMS_STRIPE_STEP) { // Go through A's columns, 8 (== PARAMS_STRIPE_STEP) columns at a time. + for (i = 0; i < (PARAMS_N * PARAMS_STRIPE_STEP); i += PARAMS_STRIPE_STEP) { + a_cols_temp[i + 1] = kk; // Loading values in the little-endian order + } + + #if !defined(USE_OPENSSL) + AES128_ECB_enc_sch((uint8_t *)a_cols_temp, PARAMS_N * PARAMS_STRIPE_STEP * sizeof(int16_t), aes_key_schedule, (uint8_t *)a_cols); + #else + if (1 != EVP_EncryptUpdate(aes_key_schedule, (uint8_t *)a_cols, &len, (uint8_t *)a_cols_temp, PARAMS_N * PARAMS_STRIPE_STEP * sizeof(int16_t))) { + handleErrors(); + } + #endif + + for (i = 0; i < PARAMS_N; i++) { // Transpose a_cols to have access to it in the column-major order. + for (k = 0; k < PARAMS_STRIPE_STEP; k++) { + a_cols_t[k * PARAMS_N + i] = a_cols[i * PARAMS_STRIPE_STEP + k]; + } + } + + for (i = 0; i < PARAMS_NBAR; i++) { + for (k = 0; k < PARAMS_STRIPE_STEP; k += PARAMS_PARALLEL) { + uint16_t sum[PARAMS_PARALLEL] = {0}; + for (j = 0; j < PARAMS_N; j++) { // Matrix-vector multiplication + uint16_t sp = s[i * PARAMS_N + j]; + sum[0] += sp * a_cols_t[(k + 0) * PARAMS_N + j]; + sum[1] += sp * a_cols_t[(k + 1) * PARAMS_N + j]; + sum[2] += sp * a_cols_t[(k + 2) * PARAMS_N + j]; + sum[3] += sp * a_cols_t[(k + 3) * PARAMS_N + j]; + } + out[i * PARAMS_N + kk + k + 0] += sum[0]; + out[i * PARAMS_N + kk + k + 2] += sum[2]; + out[i * PARAMS_N + kk + k + 1] += sum[1]; + out[i * PARAMS_N + kk + k + 3] += sum[3]; + } + } + } + AES128_free_schedule(aes_key_schedule); + + #elif defined (USE_SHAKE128_FOR_A) // SHAKE128 + int t = 0; + uint16_t a_cols[4 * PARAMS_N] = {0}; + uint8_t seed_A_separated[2 + BYTES_SEED_A]; + uint16_t *seed_A_origin = (uint16_t *)&seed_A_separated; + memcpy(&seed_A_separated[2], seed_A, BYTES_SEED_A); + for (kk = 0; kk < PARAMS_N; kk += 4) { + seed_A_origin[0] = (uint16_t) (kk + 0); + shake128((unsigned char *)(a_cols + 0 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); + seed_A_origin[0] = (uint16_t) (kk + 1); + shake128((unsigned char *)(a_cols + 1 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); + seed_A_origin[0] = (uint16_t) (kk + 2); + shake128((unsigned char *)(a_cols + 2 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); + seed_A_origin[0] = (uint16_t) (kk + 3); + shake128((unsigned char *)(a_cols + 3 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A); + + for (i = 0; i < PARAMS_NBAR; i++) { + uint16_t sum[PARAMS_N] = {0}; + for (j = 0; j < 4; j++) { + uint16_t sp = s[i * PARAMS_N + kk + j]; + for (k = 0; k < PARAMS_N; k++) { // Matrix-vector multiplication + sum[k] += sp * a_cols[(t + j) * PARAMS_N + k]; + } + } + for (k = 0; k < PARAMS_N; k++) { + out[i * PARAMS_N + k] += sum[k]; + } + } + } + #endif + + return 1; +} diff --git a/crypto_kem/frodokem640shake/opt/noise.c b/crypto_kem/frodokem640shake/opt/noise.c new file mode 100644 index 00000000..170a944b --- /dev/null +++ b/crypto_kem/frodokem640shake/opt/noise.c @@ -0,0 +1,35 @@ +/******************************************************************************************** +* FrodoKEM: Learning with Errors Key Encapsulation +* +* Abstract: noise sampling functions +*********************************************************************************************/ + +#include + +#include "api.h" +#include "common.h" +#include "params.h" + +static uint16_t CDF_TABLE[CDF_TABLE_LEN] = CDF_TABLE_DATA; + +void PQCLEAN_FRODOKEM640SHAKE_OPT_sample_n(uint16_t *s, size_t n) { + // Fills vector s with n samples from the noise distribution which requires 16 bits to sample. + // The distribution is specified by its CDF. + // Input: pseudo-random values (2*n bytes) passed in s. The input is overwritten by the output. + size_t i; + unsigned int j; + + for (i = 0; i < n; ++i) { + uint16_t sample = 0; + uint16_t prnd = s[i] >> 1; // Drop the least significant bit + uint16_t sign = s[i] & 0x1; // Pick the least significant bit + + // No need to compare with the last value. + for (j = 0; j < (unsigned int)(CDF_TABLE_LEN - 1); j++) { + // Constant time comparison: 1 if CDF_TABLE[j] < s, 0 otherwise. Uses the fact that CDF_TABLE[j] and s fit in 15 bits. + sample += (uint16_t)(CDF_TABLE[j] - prnd) >> 15; + } + // Assuming that sign is either 0 or 1, flips sample iff sign = 1 + s[i] = ((-sign) ^ sample) + sign; + } +} diff --git a/crypto_kem/frodokem640shake/opt/params.h b/crypto_kem/frodokem640shake/opt/params.h new file mode 100644 index 00000000..dcdad3d3 --- /dev/null +++ b/crypto_kem/frodokem640shake/opt/params.h @@ -0,0 +1,27 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#define CRYPTO_SECRETKEYBYTES PQCLEAN_FRODOKEM640SHAKE_OPT_CRYPTO_SECRETKEYBYTES +#define CRYPTO_PUBLICKEYBYTES PQCLEAN_FRODOKEM640SHAKE_OPT_CRYPTO_PUBLICKEYBYTES +#define CRYPTO_BYTES PQCLEAN_FRODOKEM640SHAKE_OPT_CRYPTO_BYTES +#define CRYPTO_CIPHERTEXTBYTES PQCLEAN_FRODOKEM640SHAKE_OPT_CRYPTO_CIPHERTEXTBYTES + +#define PARAMS_N 640 +#define PARAMS_NBAR 8 +#define PARAMS_LOGQ 15 +#define PARAMS_Q (1 << PARAMS_LOGQ) +#define PARAMS_EXTRACTED_BITS 2 +#define PARAMS_STRIPE_STEP 8 +#define PARAMS_PARALLEL 4 +#define BYTES_SEED_A 16 +#define BYTES_MU ((PARAMS_EXTRACTED_BITS * PARAMS_NBAR * PARAMS_NBAR) / 8) +#define BYTES_PKHASH CRYPTO_BYTES + +// Selecting SHAKE XOF function for the KEM and noise sampling +#define shake shake128 + +// CDF table +#define CDF_TABLE_DATA {4643, 13363, 20579, 25843, 29227, 31145, 32103, 32525, 32689, 32745, 32762, 32766, 32767} +#define CDF_TABLE_LEN 13 + +#endif diff --git a/crypto_kem/frodokem640shake/opt/util.c b/crypto_kem/frodokem640shake/opt/util.c new file mode 100644 index 00000000..ab2f74c9 --- /dev/null +++ b/crypto_kem/frodokem640shake/opt/util.c @@ -0,0 +1,235 @@ +/******************************************************************************************** +* FrodoKEM: Learning with Errors Key Encapsulation +* +* Abstract: additional functions for FrodoKEM +*********************************************************************************************/ + +#include +#include + +#include "api.h" +#include "common.h" +#include "params.h" + +#define min(x, y) (((x) < (y)) ? (x) : (y)) + +uint16_t PQCLEAN_FRODOKEM640SHAKE_OPT_LE_TO_UINT16(uint16_t n) { + return (((uint8_t *) &n)[0] | (((uint8_t *) &n)[1] << 8)); +} + +uint16_t PQCLEAN_FRODOKEM640SHAKE_OPT_UINT16_TO_LE(uint16_t n) { + uint16_t y; + uint8_t *z = (uint8_t *) &y; + z[0] = n & 0xFF; + z[1] = (n & 0xFF00) >> 8; + return y; +} + +void PQCLEAN_FRODOKEM640SHAKE_OPT_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s) { + // Multiply by s on the right + // Inputs: b (N_BAR x N), s (N x N_BAR) + // Output: out = b*s (N_BAR x N_BAR) + int i, j, k; + + for (i = 0; i < PARAMS_NBAR; i++) { + for (j = 0; j < PARAMS_NBAR; j++) { + out[i * PARAMS_NBAR + j] = 0; + for (k = 0; k < PARAMS_N; k++) { + out[i * PARAMS_NBAR + j] += b[i * PARAMS_N + k] * s[j * PARAMS_N + k]; + } + out[i * PARAMS_NBAR + j] = (uint32_t)(out[i * PARAMS_NBAR + j]) & ((1 << PARAMS_LOGQ) - 1); + } + } +} + + +void PQCLEAN_FRODOKEM640SHAKE_OPT_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e) { + // Multiply by s on the left + // Inputs: b (N x N_BAR), s (N_BAR x N), e (N_BAR x N_BAR) + // Output: out = s*b + e (N_BAR x N_BAR) + int i, j, k; + + for (k = 0; k < PARAMS_NBAR; k++) { + for (i = 0; i < PARAMS_NBAR; i++) { + out[k * PARAMS_NBAR + i] = e[k * PARAMS_NBAR + i]; + for (j = 0; j < PARAMS_N; j++) { + out[k * PARAMS_NBAR + i] += s[k * PARAMS_N + j] * b[j * PARAMS_NBAR + i]; + } + out[k * PARAMS_NBAR + i] = (uint32_t)(out[k * PARAMS_NBAR + i]) & ((1 << PARAMS_LOGQ) - 1); + } + } +} + + +void PQCLEAN_FRODOKEM640SHAKE_OPT_add(uint16_t *out, const uint16_t *a, const uint16_t *b) { + // Add a and b + // Inputs: a, b (N_BAR x N_BAR) + // Output: c = a + b + + for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) { + out[i] = (a[i] + b[i]) & ((1 << PARAMS_LOGQ) - 1); + } +} + + +void PQCLEAN_FRODOKEM640SHAKE_OPT_sub(uint16_t *out, const uint16_t *a, const uint16_t *b) { + // Subtract a and b + // Inputs: a, b (N_BAR x N_BAR) + // Output: c = a - b + + for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) { + out[i] = (a[i] - b[i]) & ((1 << PARAMS_LOGQ) - 1); + } +} + + +void PQCLEAN_FRODOKEM640SHAKE_OPT_key_encode(uint16_t *out, const uint16_t *in) { + // Encoding + unsigned int i, j, npieces_word = 8; + unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8; + uint64_t temp, mask = ((uint64_t)1 << PARAMS_EXTRACTED_BITS) - 1; + uint16_t *pos = out; + + for (i = 0; i < nwords; i++) { + temp = 0; + for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) { + temp |= ((uint64_t)((uint8_t *)in)[i * PARAMS_EXTRACTED_BITS + j]) << (8 * j); + } + for (j = 0; j < npieces_word; j++) { + *pos = (uint16_t)((temp & mask) << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS)); + temp >>= PARAMS_EXTRACTED_BITS; + pos++; + } + } +} + + +void PQCLEAN_FRODOKEM640SHAKE_OPT_key_decode(uint16_t *out, const uint16_t *in) { + // Decoding + unsigned int i, j, index = 0, npieces_word = 8; + unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8; + uint16_t temp, maskex = ((uint16_t)1 << PARAMS_EXTRACTED_BITS) - 1, maskq = ((uint16_t)1 << PARAMS_LOGQ) - 1; + uint8_t *pos = (uint8_t *)out; + uint64_t templong; + + for (i = 0; i < nwords; i++) { + templong = 0; + for (j = 0; j < npieces_word; j++) { // temp = floor(in*2^{-11}+0.5) + temp = ((in[index] & maskq) + (1 << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS - 1))) >> (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS); + templong |= ((uint64_t)(temp & maskex)) << (PARAMS_EXTRACTED_BITS * j); + index++; + } + for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) { + pos[i * PARAMS_EXTRACTED_BITS + j] = (templong >> (8 * j)) & 0xFF; + } + } +} + + +void PQCLEAN_FRODOKEM640SHAKE_OPT_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb) { + // Pack the input uint16 vector into a char output vector, copying lsb bits from each input element. + // If inlen * lsb / 8 > outlen, only outlen * 8 bits are copied. + memset(out, 0, outlen); + + size_t i = 0; // whole bytes already filled in + size_t j = 0; // whole uint16_t already copied + uint16_t w = 0; // the leftover, not yet copied + uint8_t bits = 0; // the number of lsb in w + + while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) { + /* + in: | | |********|********| + ^ + j + w : | ****| + ^ + bits + out:|**|**|**|**|**|**|**|**|* | + ^^ + ib + */ + uint8_t b = 0; // bits in out[i] already filled in + while (b < 8) { + int nbits = min(8 - b, bits); + uint16_t mask = (1 << nbits) - 1; + uint8_t t = (uint8_t) ((w >> (bits - nbits)) & mask); // the bits to copy from w to out + out[i] = out[i] + (t << (8 - b - nbits)); + b += (uint8_t) nbits; + bits -= (uint8_t) nbits; + w &= ~(mask << bits); // not strictly necessary; mostly for debugging + + if (bits == 0) { + if (j < inlen) { + w = in[j]; + bits = lsb; + j++; + } else { + break; // the input vector is exhausted + } + } + } + if (b == 8) { // out[i] is filled in + i++; + } + } +} + + +void PQCLEAN_FRODOKEM640SHAKE_OPT_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb) { + // Unpack the input char vector into a uint16_t output vector, copying lsb bits + // for each output element from input. outlen must be at least ceil(inlen * 8 / lsb). + memset(out, 0, outlen * sizeof(uint16_t)); + + size_t i = 0; // whole uint16_t already filled in + size_t j = 0; // whole bytes already copied + uint8_t w = 0; // the leftover, not yet copied + uint8_t bits = 0; // the number of lsb bits of w + + while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) { + /* + in: | | | | | | |**|**|... + ^ + j + w : | *| + ^ + bits + out:| *****| *****| *** | |... + ^ ^ + i b + */ + uint8_t b = 0; // bits in out[i] already filled in + while (b < lsb) { + int nbits = min(lsb - b, bits); + uint16_t mask = (1 << nbits) - 1; + uint8_t t = (w >> (bits - nbits)) & mask; // the bits to copy from w to out + out[i] = out[i] + (t << (lsb - b - nbits)); + b += (uint8_t) nbits; + bits -= (uint8_t) nbits; + w &= ~(mask << bits); // not strictly necessary; mostly for debugging + + if (bits == 0) { + if (j < inlen) { + w = in[j]; + bits = 8; + j++; + } else { + break; // the input vector is exhausted + } + } + } + if (b == lsb) { // out[i] is filled in + i++; + } + } +} + + +void PQCLEAN_FRODOKEM640SHAKE_OPT_clear_bytes(uint8_t *mem, size_t n) { + // Clear 8-bit bytes from memory. "n" indicates the number of bytes to be zeroed. + // This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing. + volatile uint8_t *v = mem; + + for (size_t i = 0; i < n; i++) { + v[i] = 0; + } +}