Browse Source

Remove not needed code

pull/24/head
Henry Case 2 years ago
parent
commit
75368fee9d
100 changed files with 16 additions and 37256 deletions
  1. +1
    -24
      CMakeLists.txt
  2. +15
    -14
      README.md
  3. +0
    -13
      public/pqc/pqc.h
  4. +0
    -23
      src/capi/schemes.h
  5. +0
    -10
      src/kem/frodo/frodokem1344shake/clean/CMakeLists.txt
  6. +0
    -20
      src/kem/frodo/frodokem1344shake/clean/api.h
  7. +0
    -21
      src/kem/frodo/frodokem1344shake/clean/common.h
  8. +0
    -237
      src/kem/frodo/frodokem1344shake/clean/kem.c
  9. +0
    -108
      src/kem/frodo/frodokem1344shake/clean/matrix_shake.c
  10. +0
    -35
      src/kem/frodo/frodokem1344shake/clean/noise.c
  11. +0
    -27
      src/kem/frodo/frodokem1344shake/clean/params.h
  12. +0
    -264
      src/kem/frodo/frodokem1344shake/clean/util.c
  13. +0
    -10
      src/kem/frodo/frodokem640shake/clean/CMakeLists.txt
  14. +0
    -20
      src/kem/frodo/frodokem640shake/clean/api.h
  15. +0
    -21
      src/kem/frodo/frodokem640shake/clean/common.h
  16. +0
    -237
      src/kem/frodo/frodokem640shake/clean/kem.c
  17. +0
    -108
      src/kem/frodo/frodokem640shake/clean/matrix_shake.c
  18. +0
    -35
      src/kem/frodo/frodokem640shake/clean/noise.c
  19. +0
    -27
      src/kem/frodo/frodokem640shake/clean/params.h
  20. +0
    -264
      src/kem/frodo/frodokem640shake/clean/util.c
  21. +0
    -10
      src/kem/frodo/frodokem976shake/clean/CMakeLists.txt
  22. +0
    -20
      src/kem/frodo/frodokem976shake/clean/api.h
  23. +0
    -21
      src/kem/frodo/frodokem976shake/clean/common.h
  24. +0
    -237
      src/kem/frodo/frodokem976shake/clean/kem.c
  25. +0
    -108
      src/kem/frodo/frodokem976shake/clean/matrix_shake.c
  26. +0
    -35
      src/kem/frodo/frodokem976shake/clean/noise.c
  27. +0
    -27
      src/kem/frodo/frodokem976shake/clean/params.h
  28. +0
    -264
      src/kem/frodo/frodokem976shake/clean/util.c
  29. +0
    -32
      src/kem/ntru/ntruhps2048509/avx2/CMakeLists.txt
  30. +0
    -19
      src/kem/ntru/ntruhps2048509/avx2/api.h
  31. +0
    -11
      src/kem/ntru/ntruhps2048509/avx2/cmov.c
  32. +0
    -10
      src/kem/ntru/ntruhps2048509/avx2/cmov.h
  33. +0
    -1218
      src/kem/ntru/ntruhps2048509/avx2/crypto_sort_int32.c
  34. +0
    -11
      src/kem/ntru/ntruhps2048509/avx2/crypto_sort_int32.h
  35. +0
    -63
      src/kem/ntru/ntruhps2048509/avx2/kem.c
  36. +0
    -183
      src/kem/ntru/ntruhps2048509/avx2/owcpa.c
  37. +0
    -19
      src/kem/ntru/ntruhps2048509/avx2/owcpa.h
  38. +0
    -46
      src/kem/ntru/ntruhps2048509/avx2/pack3.c
  39. +0
    -93
      src/kem/ntru/ntruhps2048509/avx2/packq.c
  40. +0
    -37
      src/kem/ntru/ntruhps2048509/avx2/params.h
  41. +0
    -75
      src/kem/ntru/ntruhps2048509/avx2/poly.c
  42. +0
    -41
      src/kem/ntru/ntruhps2048509/avx2/poly.h
  43. +0
    -11
      src/kem/ntru/ntruhps2048509/avx2/poly_lift.c
  44. +0
    -676
      src/kem/ntru/ntruhps2048509/avx2/poly_mod_3_Phi_n.s
  45. +0
    -80
      src/kem/ntru/ntruhps2048509/avx2/poly_mod_q_Phi_n.s
  46. +0
    -80
      src/kem/ntru/ntruhps2048509/avx2/poly_r2_inv.c
  47. +0
    -20
      src/kem/ntru/ntruhps2048509/avx2/poly_r2_inv.h
  48. +0
    -285
      src/kem/ntru/ntruhps2048509/avx2/poly_r2_mul.s
  49. +0
    -5520
      src/kem/ntru/ntruhps2048509/avx2/poly_rq_mul.s
  50. +0
    -840
      src/kem/ntru/ntruhps2048509/avx2/poly_rq_to_s3.s
  51. +0
    -463
      src/kem/ntru/ntruhps2048509/avx2/poly_s3_inv.c
  52. +0
    -45
      src/kem/ntru/ntruhps2048509/avx2/sample.c
  53. +0
    -17
      src/kem/ntru/ntruhps2048509/avx2/sample.h
  54. +0
    -21
      src/kem/ntru/ntruhps2048509/avx2/sample_iid.c
  55. +0
    -2854
      src/kem/ntru/ntruhps2048509/avx2/square_126_509_shufbytes.s
  56. +0
    -5125
      src/kem/ntru/ntruhps2048509/avx2/square_15_509_shufbytes.s
  57. +0
    -109
      src/kem/ntru/ntruhps2048509/avx2/square_1_509_patience.s
  58. +0
    -3992
      src/kem/ntru/ntruhps2048509/avx2/square_252_509_shufbytes.s
  59. +0
    -4316
      src/kem/ntru/ntruhps2048509/avx2/square_30_509_shufbytes.s
  60. +0
    -272
      src/kem/ntru/ntruhps2048509/avx2/square_3_509_patience.s
  61. +0
    -4186
      src/kem/ntru/ntruhps2048509/avx2/square_63_509_shufbytes.s
  62. +0
    -296
      src/kem/ntru/ntruhps2048509/avx2/square_6_509_patience.s
  63. +0
    -784
      src/kem/ntru/ntruhps2048509/avx2/vec32_sample_iid.s
  64. +0
    -20
      src/kem/ntru/ntruhps2048509/clean/CMakeLists.txt
  65. +0
    -19
      src/kem/ntru/ntruhps2048509/clean/api.h
  66. +0
    -11
      src/kem/ntru/ntruhps2048509/clean/cmov.c
  67. +0
    -10
      src/kem/ntru/ntruhps2048509/clean/cmov.h
  68. +0
    -86
      src/kem/ntru/ntruhps2048509/clean/crypto_sort_int32.c
  69. +0
    -11
      src/kem/ntru/ntruhps2048509/clean/crypto_sort_int32.h
  70. +0
    -63
      src/kem/ntru/ntruhps2048509/clean/kem.c
  71. +0
    -183
      src/kem/ntru/ntruhps2048509/clean/owcpa.c
  72. +0
    -19
      src/kem/ntru/ntruhps2048509/clean/owcpa.h
  73. +0
    -46
      src/kem/ntru/ntruhps2048509/clean/pack3.c
  74. +0
    -93
      src/kem/ntru/ntruhps2048509/clean/packq.c
  75. +0
    -37
      src/kem/ntru/ntruhps2048509/clean/params.h
  76. +0
    -75
      src/kem/ntru/ntruhps2048509/clean/poly.c
  77. +0
    -39
      src/kem/ntru/ntruhps2048509/clean/poly.h
  78. +0
    -11
      src/kem/ntru/ntruhps2048509/clean/poly_lift.c
  79. +0
    -53
      src/kem/ntru/ntruhps2048509/clean/poly_mod.c
  80. +0
    -69
      src/kem/ntru/ntruhps2048509/clean/poly_r2_inv.c
  81. +0
    -284
      src/kem/ntru/ntruhps2048509/clean/poly_rq_mul.c
  82. +0
    -78
      src/kem/ntru/ntruhps2048509/clean/poly_s3_inv.c
  83. +0
    -45
      src/kem/ntru/ntruhps2048509/clean/sample.c
  84. +0
    -17
      src/kem/ntru/ntruhps2048509/clean/sample.h
  85. +0
    -26
      src/kem/ntru/ntruhps2048509/clean/sample_iid.c
  86. +0
    -34
      src/kem/ntru/ntruhps2048677/avx2/CMakeLists.txt
  87. +0
    -19
      src/kem/ntru/ntruhps2048677/avx2/api.h
  88. +0
    -11
      src/kem/ntru/ntruhps2048677/avx2/cmov.c
  89. +0
    -10
      src/kem/ntru/ntruhps2048677/avx2/cmov.h
  90. +0
    -1218
      src/kem/ntru/ntruhps2048677/avx2/crypto_sort_int32.c
  91. +0
    -11
      src/kem/ntru/ntruhps2048677/avx2/crypto_sort_int32.h
  92. +0
    -63
      src/kem/ntru/ntruhps2048677/avx2/kem.c
  93. +0
    -183
      src/kem/ntru/ntruhps2048677/avx2/owcpa.c
  94. +0
    -19
      src/kem/ntru/ntruhps2048677/avx2/owcpa.h
  95. +0
    -46
      src/kem/ntru/ntruhps2048677/avx2/pack3.c
  96. +0
    -93
      src/kem/ntru/ntruhps2048677/avx2/packq.c
  97. +0
    -37
      src/kem/ntru/ntruhps2048677/avx2/params.h
  98. +0
    -75
      src/kem/ntru/ntruhps2048677/avx2/poly.c
  99. +0
    -41
      src/kem/ntru/ntruhps2048677/avx2/poly.h
  100. +0
    -11
      src/kem/ntru/ntruhps2048677/avx2/poly_lift.c

+ 1
- 24
CMakeLists.txt View File

@@ -167,7 +167,7 @@ FetchContent_Declare(
gbench
SOURCE_DIR ${PROJECT_SOURCE_DIR}/3rd/gbench
GIT_REPOSITORY https://github.com/kriskwiatkowski/benchmark.git
GIT_TAG 49862ab56b6b7c3afd87b80bd5d787ed78ce3b96
GIT_TAG hdc/release_crypto
)
FetchContent_Populate(gbench)

@@ -236,19 +236,6 @@ add_subdirectory(src/sign/sphincs/sphincs-sha256-192f-robust/clean)
add_subdirectory(src/kem/kyber/kyber512/clean)
add_subdirectory(src/kem/kyber/kyber768/clean)
add_subdirectory(src/kem/kyber/kyber1024/clean)
add_subdirectory(src/kem/saber/lightsaber/clean)
add_subdirectory(src/kem/saber/firesaber/clean)
add_subdirectory(src/kem/saber/saber/clean)
add_subdirectory(src/kem/frodo/frodokem640shake/clean)
add_subdirectory(src/kem/frodo/frodokem976shake/clean)
add_subdirectory(src/kem/frodo/frodokem1344shake/clean)
add_subdirectory(src/kem/ntru/ntruhps4096821/clean)
add_subdirectory(src/kem/ntru/ntruhps2048509/clean)
add_subdirectory(src/kem/ntru/ntruhrss701/clean)
add_subdirectory(src/kem/ntru/ntruhps2048677/clean)
add_subdirectory(src/kem/ntru_prime/ntrulpr761/clean)
add_subdirectory(src/kem/ntru_prime/ntrulpr653/clean)
add_subdirectory(src/kem/ntru_prime/ntrulpr857/clean)
add_subdirectory(src/kem/hqc/hqc-rmrs-128/clean)
add_subdirectory(src/kem/hqc/hqc-rmrs-192/clean)
add_subdirectory(src/kem/hqc/hqc-rmrs-256/clean)
@@ -300,16 +287,6 @@ add_subdirectory(src/sign/sphincs/sphincs-sha256-256s-robust/avx2)
add_subdirectory(src/kem/kyber/kyber512/avx2)
add_subdirectory(src/kem/kyber/kyber768/avx2)
add_subdirectory(src/kem/kyber/kyber1024/avx2)
add_subdirectory(src/kem/saber/lightsaber/avx2)
add_subdirectory(src/kem/saber/firesaber/avx2)
add_subdirectory(src/kem/saber/saber/avx2)
add_subdirectory(src/kem/ntru/ntruhps4096821/avx2)
add_subdirectory(src/kem/ntru/ntruhps2048509/avx2)
add_subdirectory(src/kem/ntru/ntruhrss701/avx2)
add_subdirectory(src/kem/ntru/ntruhps2048677/avx2)
add_subdirectory(src/kem/ntru_prime/ntrulpr761/avx2)
add_subdirectory(src/kem/ntru_prime/ntrulpr653/avx2)
add_subdirectory(src/kem/ntru_prime/ntrulpr857/avx2)
add_subdirectory(src/kem/hqc/hqc-rmrs-128/avx2)
add_subdirectory(src/kem/hqc/hqc-rmrs-192/avx2)
add_subdirectory(src/kem/hqc/hqc-rmrs-256/avx2)


+ 15
- 14
README.md View File

@@ -8,20 +8,21 @@ Users shouldn't expect any level of security provided by this code. The library

## Supported schemes

| Name | NIST Round | x86 optimized |
|--------------------------|------------|---------------|
| Kyber | 3 | x |
| SABER | 3 | x |
| FrodoKEM | 3 | |
| Dilithium | 3 | x |
| Falcon | 3 | |
| SPHINCS+ SHA256/SHAKE256 | 3 | x |
| NTRU | 3 | x |
| NTRU Prime | 3 | x |
| HQC-RMRS | 3 | x |
| Rainbow | 3 | |
| SIKE/p434 | 3 | x |
| McEliece | 3 | |
| Name | x86 optimized |
|--------------------------|------------|
| Kyber | x |
| Dilithium | x |
| Falcon | |
| SPHINCS+ SHA256/SHAKE256 | x |


## Round 4 algorithms

| Name | x86 optimized |
|--------------------------|------------|
| HQC-RMRS | x |
| SIKE/p434 | x |
| McEliece | |

## Building



+ 0
- 13
public/pqc/pqc.h View File

@@ -44,22 +44,9 @@ extern "C" {
// Defines supported kem algorithm list. The resulting
// ID of an algorithm is PQC_ALG_KEM_(NAME_AS_BELOW)
#define PQC_SUPPORTED_KEMS(_)\
_(FRODOKEM640SHAKE) \
_(FRODOKEM976SHAKE) \
_(FRODOKEM1344SHAKE) \
_(KYBER512) \
_(KYBER768) \
_(KYBER1024) \
_(NTRUHPS2048509) \
_(NTRUHPS4096821) \
_(NTRUHRSS701) \
_(NTRUHPS2048677) \
_(NTRULPR761) \
_(NTRULPR653) \
_(NTRULPR857) \
_(LIGHTSABER) \
_(SABER) \
_(FIRESABER) \
_(HQCRMRS128) \
_(HQCRMRS192) \
_(HQCRMRS256) \


+ 0
- 23
src/capi/schemes.h View File

@@ -57,20 +57,6 @@
#include "sign/dilithium/dilithium5/clean/api.h"
#include "sign/dilithium/dilithium5/avx2/api.h"
#include "sign/falcon/api.h"
#include "kem/ntru/ntruhps4096821/clean/api.h"
#include "kem/ntru/ntruhps4096821/avx2/api.h"
#include "kem/ntru/ntruhps2048509/clean/api.h"
#include "kem/ntru/ntruhps2048509/avx2/api.h"
#include "kem/ntru/ntruhrss701/clean/api.h"
#include "kem/ntru/ntruhrss701/avx2/api.h"
#include "kem/ntru/ntruhps2048677/clean/api.h"
#include "kem/ntru/ntruhps2048677/avx2/api.h"
#include "kem/ntru_prime/ntrulpr761/clean/api.h"
#include "kem/ntru_prime/ntrulpr761/avx2/api.h"
#include "kem/ntru_prime/ntrulpr653/clean/api.h"
#include "kem/ntru_prime/ntrulpr653/avx2/api.h"
#include "kem/ntru_prime/ntrulpr857/clean/api.h"
#include "kem/ntru_prime/ntrulpr857/avx2/api.h"
#include "kem/kyber/kyber768/clean/api.h"
#include "kem/kyber/kyber768/avx2/api.h"
#include "kem/kyber/kyber1024/clean/api.h"
@@ -97,15 +83,6 @@
#include "kem/mceliece/mceliece6960119/clean/api.h"
#include "kem/mceliece/mceliece348864/avx/api.h"
#include "kem/mceliece/mceliece348864/clean/api.h"
#include "kem/frodo/frodokem976shake/clean/api.h"
#include "kem/frodo/frodokem1344shake/clean/api.h"
#include "kem/frodo/frodokem640shake/clean/api.h"
#include "kem/saber/lightsaber/clean/api.h"
#include "kem/saber/lightsaber/avx2/api.h"
#include "kem/saber/firesaber/clean/api.h"
#include "kem/saber/firesaber/avx2/api.h"
#include "kem/saber/saber/clean/api.h"
#include "kem/saber/saber/avx2/api.h"
#include "kem/hqc/hqc-rmrs-128/clean/api.h"
#include "kem/hqc/hqc-rmrs-192/clean/api.h"
#include "kem/hqc/hqc-rmrs-256/clean/api.h"


+ 0
- 10
src/kem/frodo/frodokem1344shake/clean/CMakeLists.txt View File

@@ -1,10 +0,0 @@
set(
SRC_CLEAN_FRODOKEM1344SHAKE
kem.c
matrix_shake.c
noise.c
util.c
)

define_kem_alg(frodo1344shake_clean
PQCLEAN_FRODOKEM1344SHAKE_OPT "${SRC_CLEAN_FRODOKEM1344SHAKE}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 0
- 20
src/kem/frodo/frodokem1344shake/clean/api.h View File

@@ -1,20 +0,0 @@
#ifndef PQCLEAN_FRODOKEM1344SHAKE_CLEAN_API_H
#define PQCLEAN_FRODOKEM1344SHAKE_CLEAN_API_H

#include <stddef.h>
#include <stdint.h>

#define PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_SECRETKEYBYTES 43088 // sizeof(s) + CRYPTO_PUBLICKEYBYTES + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH
#define PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_PUBLICKEYBYTES 21520 // sizeof(seed_A) + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8
#define PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_BYTES 32
#define PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_CIPHERTEXTBYTES 21632 // (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + (PARAMS_LOGQ*PARAMS_NBAR*PARAMS_NBAR)/8

#define PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_ALGNAME "FrodoKEM-1344-SHAKE"

int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk);

int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);

int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);

#endif

+ 0
- 21
src/kem/frodo/frodokem1344shake/clean/common.h View File

@@ -1,21 +0,0 @@
#ifndef COMMON_H
#define COMMON_H

int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(uint16_t *s, size_t n);
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s);
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e);
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b);
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b);
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_key_encode(uint16_t *out, const uint16_t *in);
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_key_decode(uint16_t *out, const uint16_t *in);
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb);
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb);
int8_t PQCLEAN_FRODOKEM1344SHAKE_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len);
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector);
void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(uint8_t *mem, size_t n);
uint16_t PQCLEAN_FRODOKEM1344SHAKE_CLEAN_LE_TO_UINT16(uint16_t n);
uint16_t PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(uint16_t n);

#endif

+ 0
- 237
src/kem/frodo/frodokem1344shake/clean/kem.c View File

@@ -1,237 +0,0 @@
/********************************************************************************************
* FrodoKEM: Learning with Errors Key Encapsulation
*
* Abstract: Key Encapsulation Mechanism (KEM) based on Frodo
*********************************************************************************************/

#include <stdint.h>
#include <string.h>

#include "fips202.h"
#include "randombytes.h"

#include "api.h"
#include "common.h"
#include "params.h"

int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
// FrodoKEM's key generation
// Outputs: public key pk ( BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 bytes)
// secret key sk (CRYPTO_BYTES + BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH bytes)
uint8_t *pk_seedA = &pk[0];
uint8_t *pk_b = &pk[BYTES_SEED_A];
uint8_t *sk_s = &sk[0];
uint8_t *sk_pk = &sk[CRYPTO_BYTES];
uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t S[2 * PARAMS_N * PARAMS_NBAR] = {0}; // contains secret data
uint16_t *E = &S[PARAMS_N * PARAMS_NBAR]; // contains secret data
uint8_t randomness[2 * CRYPTO_BYTES + BYTES_SEED_A]; // contains secret data via randomness_s and randomness_seedSE
uint8_t *randomness_s = &randomness[0]; // contains secret data
uint8_t *randomness_seedSE = &randomness[CRYPTO_BYTES]; // contains secret data
uint8_t *randomness_z = &randomness[2 * CRYPTO_BYTES];
uint8_t shake_input_seedSE[1 + CRYPTO_BYTES]; // contains secret data

// Generate the secret value s, the seed for S and E, and the seed for the seed for A. Add seed_A to the public key
randombytes(randomness, CRYPTO_BYTES + CRYPTO_BYTES + BYTES_SEED_A);
shake(pk_seedA, BYTES_SEED_A, randomness_z, BYTES_SEED_A);

// Generate S and E, and compute B = A*S + E. Generate A on-the-fly
shake_input_seedSE[0] = 0x5F;
memcpy(&shake_input_seedSE[1], randomness_seedSE, CRYPTO_BYTES);
shake((uint8_t *)S, 2 * PARAMS_N * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
for (size_t i = 0; i < 2 * PARAMS_N * PARAMS_NBAR; i++) {
S[i] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_LE_TO_UINT16(S[i]);
}
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(S, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(E, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_as_plus_e(B, S, E, pk);

// Encode the second part of the public key
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_pack(pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, B, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

// Add s, pk and S to the secret key
memcpy(sk_s, randomness_s, CRYPTO_BYTES);
memcpy(sk_pk, pk, CRYPTO_PUBLICKEYBYTES);
for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
S[i] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(S[i]);
}
memcpy(sk_S, S, 2 * PARAMS_N * PARAMS_NBAR);

// Add H(pk) to the secret key
shake(sk_pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);

// Cleanup:
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)E, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(randomness, 2 * CRYPTO_BYTES);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
return 0;
}


int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
// FrodoKEM's key encapsulation
const uint8_t *pk_seedA = &pk[0];
const uint8_t *pk_b = &pk[BYTES_SEED_A];
uint8_t *ct_c1 = &ct[0];
uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t V[PARAMS_NBAR * PARAMS_NBAR] = {0}; // contains secret data
uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR]; // contains secret data
uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR]; // contains secret data
uint8_t G2in[BYTES_PKHASH + BYTES_MU]; // contains secret data via mu
uint8_t *pkh = &G2in[0];
uint8_t *mu = &G2in[BYTES_PKHASH]; // contains secret data
uint8_t G2out[2 * CRYPTO_BYTES]; // contains secret data
uint8_t *seedSE = &G2out[0]; // contains secret data
uint8_t *k = &G2out[CRYPTO_BYTES]; // contains secret data
uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES]; // contains secret data via Fin_k
uint8_t *Fin_ct = &Fin[0];
uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES]; // contains secret data
uint8_t shake_input_seedSE[1 + CRYPTO_BYTES]; // contains secret data

// pkh <- G_1(pk), generate random mu, compute (seedSE || k) = G_2(pkh || mu)
shake(pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);
randombytes(mu, BYTES_MU);
shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

// Generate Sp and Ep, and compute Bp = Sp*A + Ep. Generate A on-the-fly
shake_input_seedSE[0] = 0x96;
memcpy(&shake_input_seedSE[1], seedSE, CRYPTO_BYTES);
shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
Sp[i] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_LE_TO_UINT16(Sp[i]);
}
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sa_plus_e(Bp, Sp, Ep, pk_seedA);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_pack(ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, Bp, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

// Generate Epp, and compute V = Sp*B + Epp
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sb_plus_e(V, B, Sp, Epp);

// Encode mu, and compute C = V + enc(mu) (mod q)
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_key_encode(C, (uint16_t *)mu);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_add(C, V, C);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_pack(ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, C, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOGQ);

// Compute ss = F(ct||KK)
memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);
memcpy(Fin_k, k, CRYPTO_BYTES);
shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

// Cleanup:
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)V, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(mu, BYTES_MU);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
return 0;
}


int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
// FrodoKEM's key decapsulation
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t W[PARAMS_NBAR * PARAMS_NBAR] = {0}; // contains secret data
uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
uint16_t CC[PARAMS_NBAR * PARAMS_NBAR] = {0};
uint16_t BBp[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR]; // contains secret data
uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR]; // contains secret data
const uint8_t *ct_c1 = &ct[0];
const uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
const uint8_t *sk_s = &sk[0];
const uint8_t *sk_pk = &sk[CRYPTO_BYTES];
const uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
uint16_t S[PARAMS_N * PARAMS_NBAR]; // contains secret data
const uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
const uint8_t *pk_seedA = &sk_pk[0];
const uint8_t *pk_b = &sk_pk[BYTES_SEED_A];
uint8_t G2in[BYTES_PKHASH + BYTES_MU]; // contains secret data via muprime
uint8_t *pkh = &G2in[0];
uint8_t *muprime = &G2in[BYTES_PKHASH]; // contains secret data
uint8_t G2out[2 * CRYPTO_BYTES]; // contains secret data
uint8_t *seedSEprime = &G2out[0]; // contains secret data
uint8_t *kprime = &G2out[CRYPTO_BYTES]; // contains secret data
uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES]; // contains secret data via Fin_k
uint8_t *Fin_ct = &Fin[0];
uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES]; // contains secret data
uint8_t shake_input_seedSEprime[1 + CRYPTO_BYTES]; // contains secret data

for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
S[i] = sk_S[2 * i] | (sk_S[2 * i + 1] << 8);
}

// Compute W = C - Bp*S (mod q), and decode the randomness mu
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_unpack(Bp, PARAMS_N * PARAMS_NBAR, ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, PARAMS_LOGQ);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_unpack(C, PARAMS_NBAR * PARAMS_NBAR, ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, PARAMS_LOGQ);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_bs(W, Bp, S);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sub(W, C, W);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_key_decode((uint16_t *)muprime, W);

// Generate (seedSE' || k') = G_2(pkh || mu')
memcpy(pkh, sk_pkh, BYTES_PKHASH);
shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

// Generate Sp and Ep, and compute BBp = Sp*A + Ep. Generate A on-the-fly
shake_input_seedSEprime[0] = 0x96;
memcpy(&shake_input_seedSEprime[1], seedSEprime, CRYPTO_BYTES);
shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSEprime, 1 + CRYPTO_BYTES);
for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
Sp[i] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_LE_TO_UINT16(Sp[i]);
}
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sa_plus_e(BBp, Sp, Ep, pk_seedA);

// Generate Epp, and compute W = Sp*B + Epp
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sb_plus_e(W, B, Sp, Epp);

// Encode mu, and compute CC = W + enc(mu') (mod q)
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_key_encode(CC, (uint16_t *)muprime);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_add(CC, W, CC);

// Prepare input to F
memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);

// Reducing BBp modulo q
for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
BBp[i] = BBp[i] & ((1 << PARAMS_LOGQ) - 1);
}

// If (Bp == BBp & C == CC) then ss = F(ct || k'), else ss = F(ct || s)
// Needs to avoid branching on secret data as per:
// Qian Guo, Thomas Johansson, Alexander Nilsson. A key-recovery timing attack on post-quantum
// primitives using the Fujisaki-Okamoto transformation and its application on FrodoKEM. In CRYPTO 2020.
int8_t selector = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_ct_verify(Bp, BBp, PARAMS_N * PARAMS_NBAR) | PQCLEAN_FRODOKEM1344SHAKE_CLEAN_ct_verify(C, CC, PARAMS_NBAR * PARAMS_NBAR);
// If (selector == 0) then load k' to do ss = F(ct || k'), else if (selector == -1) load s to do ss = F(ct || s)
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_ct_select((uint8_t *)Fin_k, (uint8_t *)kprime, (uint8_t *)sk_s, CRYPTO_BYTES, selector);
shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

// Cleanup:
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)W, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(muprime, BYTES_MU);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES);
PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(shake_input_seedSEprime, 1 + CRYPTO_BYTES);
return 0;
}

+ 0
- 108
src/kem/frodo/frodokem1344shake/clean/matrix_shake.c View File

@@ -1,108 +0,0 @@
/********************************************************************************************
* FrodoKEM: Learning with Errors Key Encapsulation
*
* Abstract: matrix arithmetic functions used by the KEM
*********************************************************************************************/

#include <stdint.h>
#include <string.h>

#include "fips202.h"

#include "api.h"
#include "common.h"
#include "params.h"

int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
// Generate-and-multiply: generate matrix A (N x N) row-wise, multiply by s on the right.
// Inputs: s, e (N x N_BAR)
// Output: out = A*s + e (N x N_BAR)
int j, k;
uint16_t i;
int16_t a_row[4 * PARAMS_N];

for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) {
*((uint32_t *)&out[i]) = *((uint32_t *)&e[i]);
}

uint8_t seed_A_separated[2 + BYTES_SEED_A];
uint16_t *seed_A_origin = (uint16_t *)&seed_A_separated;
memcpy(&seed_A_separated[2], seed_A, BYTES_SEED_A);
for (i = 0; i < PARAMS_N; i += 4) {
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(i + 0);
shake128((unsigned char *)(a_row + 0 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(i + 1);
shake128((unsigned char *)(a_row + 1 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(i + 2);
shake128((unsigned char *)(a_row + 2 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(i + 3);
shake128((unsigned char *)(a_row + 3 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
for (k = 0; k < 4 * PARAMS_N; k++) {
a_row[k] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_LE_TO_UINT16(a_row[k]);
}
for (k = 0; k < PARAMS_NBAR; k++) {
uint16_t sum[4] = {0};
for (j = 0; j < PARAMS_N; j++) { // Matrix-vector multiplication
uint16_t sp = s[k * PARAMS_N + j];
sum[0] += a_row[0 * PARAMS_N + j] * sp; // Go through four lines with same s
sum[1] += a_row[1 * PARAMS_N + j] * sp;
sum[2] += a_row[2 * PARAMS_N + j] * sp;
sum[3] += a_row[3 * PARAMS_N + j] * sp;
}
out[(i + 0)*PARAMS_NBAR + k] += sum[0];
out[(i + 2)*PARAMS_NBAR + k] += sum[2];
out[(i + 1)*PARAMS_NBAR + k] += sum[1];
out[(i + 3)*PARAMS_NBAR + k] += sum[3];
}
}
return 1;
}




int PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
// Generate-and-multiply: generate matrix A (N x N) column-wise, multiply by s' on the left.
// Inputs: s', e' (N_BAR x N)
// Output: out = s'*A + e' (N_BAR x N)
int i, j;
uint16_t kk;
for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) {
*((uint32_t *)&out[i]) = *((uint32_t *)&e[i]);
}

int t = 0;
uint16_t a_cols[4 * PARAMS_N];

int k;
uint8_t seed_A_separated[2 + BYTES_SEED_A];
uint16_t *seed_A_origin = (uint16_t *)&seed_A_separated;
memcpy(&seed_A_separated[2], seed_A, BYTES_SEED_A);
for (kk = 0; kk < PARAMS_N; kk += 4) {
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(kk + 0);
shake128((unsigned char *)(a_cols + 0 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(kk + 1);
shake128((unsigned char *)(a_cols + 1 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(kk + 2);
shake128((unsigned char *)(a_cols + 2 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(kk + 3);
shake128((unsigned char *)(a_cols + 3 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
for (i = 0; i < 4 * PARAMS_N; i++) {
a_cols[i] = PQCLEAN_FRODOKEM1344SHAKE_CLEAN_LE_TO_UINT16(a_cols[i]);
}

for (i = 0; i < PARAMS_NBAR; i++) {
uint16_t sum[PARAMS_N] = {0};
for (j = 0; j < 4; j++) {
uint16_t sp = s[i * PARAMS_N + kk + j];
for (k = 0; k < PARAMS_N; k++) { // Matrix-vector multiplication
sum[k] += (uint16_t)(sp * (uint32_t)a_cols[(t + j) * PARAMS_N + k]);
}
}
for (k = 0; k < PARAMS_N; k++) {
out[i * PARAMS_N + k] += sum[k];
}
}
}
return 1;
}

+ 0
- 35
src/kem/frodo/frodokem1344shake/clean/noise.c View File

@@ -1,35 +0,0 @@
/********************************************************************************************
* FrodoKEM: Learning with Errors Key Encapsulation
*
* Abstract: noise sampling functions
*********************************************************************************************/

#include <stdint.h>

#include "api.h"
#include "common.h"
#include "params.h"

static const uint16_t CDF_TABLE[CDF_TABLE_LEN] = CDF_TABLE_DATA;

void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sample_n(uint16_t *s, size_t n) {
// Fills vector s with n samples from the noise distribution which requires 16 bits to sample.
// The distribution is specified by its CDF.
// Input: pseudo-random values (2*n bytes) passed in s. The input is overwritten by the output.
size_t i;
unsigned int j;

for (i = 0; i < n; ++i) {
uint16_t sample = 0;
uint16_t prnd = s[i] >> 1; // Drop the least significant bit
uint16_t sign = s[i] & 0x1; // Pick the least significant bit

// No need to compare with the last value.
for (j = 0; j < (unsigned int)(CDF_TABLE_LEN - 1); j++) {
// Constant time comparison: 1 if CDF_TABLE[j] < s, 0 otherwise. Uses the fact that CDF_TABLE[j] and s fit in 15 bits.
sample += (uint16_t)(CDF_TABLE[j] - prnd) >> 15;
}
// Assuming that sign is either 0 or 1, flips sample iff sign = 1
s[i] = ((-sign) ^ sample) + sign;
}
}

+ 0
- 27
src/kem/frodo/frodokem1344shake/clean/params.h View File

@@ -1,27 +0,0 @@
#ifndef PARAMS_H
#define PARAMS_H

#define CRYPTO_SECRETKEYBYTES PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_SECRETKEYBYTES
#define CRYPTO_PUBLICKEYBYTES PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_PUBLICKEYBYTES
#define CRYPTO_BYTES PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_BYTES
#define CRYPTO_CIPHERTEXTBYTES PQCLEAN_FRODOKEM1344SHAKE_CLEAN_CRYPTO_CIPHERTEXTBYTES

#define PARAMS_N 1344
#define PARAMS_NBAR 8
#define PARAMS_LOGQ 16
#define PARAMS_Q (1 << PARAMS_LOGQ)
#define PARAMS_EXTRACTED_BITS 4
#define PARAMS_STRIPE_STEP 8
#define PARAMS_PARALLEL 4
#define BYTES_SEED_A 16
#define BYTES_MU ((PARAMS_EXTRACTED_BITS * PARAMS_NBAR * PARAMS_NBAR) / 8)
#define BYTES_PKHASH CRYPTO_BYTES

// Selecting SHAKE XOF function for the KEM and noise sampling
#define shake shake256

// CDF table
#define CDF_TABLE_DATA {9142, 23462, 30338, 32361, 32725, 32765, 32767}
#define CDF_TABLE_LEN 7

#endif

+ 0
- 264
src/kem/frodo/frodokem1344shake/clean/util.c View File

@@ -1,264 +0,0 @@
/********************************************************************************************
* FrodoKEM: Learning with Errors Key Encapsulation
*
* Abstract: additional functions for FrodoKEM
*********************************************************************************************/

#include <stdint.h>
#include <string.h>

#include "api.h"
#include "common.h"
#include "params.h"

static inline uint8_t min(uint8_t x, uint8_t y) {
if (x < y) {
return x;
}
return y;
}

uint16_t PQCLEAN_FRODOKEM1344SHAKE_CLEAN_LE_TO_UINT16(uint16_t n) {
return (((uint8_t *) &n)[0] | (((uint8_t *) &n)[1] << 8));
}

uint16_t PQCLEAN_FRODOKEM1344SHAKE_CLEAN_UINT16_TO_LE(uint16_t n) {
uint16_t y;
uint8_t *z = (uint8_t *) &y;
z[0] = n & 0xFF;
z[1] = (n & 0xFF00) >> 8;
return y;
}

void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s) {
// Multiply by s on the right
// Inputs: b (N_BAR x N), s (N x N_BAR)
// Output: out = b*s (N_BAR x N_BAR)
int i, j, k;

for (i = 0; i < PARAMS_NBAR; i++) {
for (j = 0; j < PARAMS_NBAR; j++) {
out[i * PARAMS_NBAR + j] = 0;
for (k = 0; k < PARAMS_N; k++) {
out[i * PARAMS_NBAR + j] += (uint16_t)(b[i * PARAMS_N + k] * (uint32_t)s[j * PARAMS_N + k]);
}
out[i * PARAMS_NBAR + j] = (uint32_t)(out[i * PARAMS_NBAR + j]) & ((1 << PARAMS_LOGQ) - 1);
}
}
}


void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e) {
// Multiply by s on the left
// Inputs: b (N x N_BAR), s (N_BAR x N), e (N_BAR x N_BAR)
// Output: out = s*b + e (N_BAR x N_BAR)
int i, j, k;

for (k = 0; k < PARAMS_NBAR; k++) {
for (i = 0; i < PARAMS_NBAR; i++) {
out[k * PARAMS_NBAR + i] = e[k * PARAMS_NBAR + i];
for (j = 0; j < PARAMS_N; j++) {
out[k * PARAMS_NBAR + i] += (uint16_t)(s[k * PARAMS_N + j] * (uint32_t)b[j * PARAMS_NBAR + i]);
}
out[k * PARAMS_NBAR + i] = (uint32_t)(out[k * PARAMS_NBAR + i]) & ((1 << PARAMS_LOGQ) - 1);
}
}
}


void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b) {
// Add a and b
// Inputs: a, b (N_BAR x N_BAR)
// Output: c = a + b

for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
out[i] = (a[i] + b[i]) & ((1 << PARAMS_LOGQ) - 1);
}
}


void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b) {
// Subtract a and b
// Inputs: a, b (N_BAR x N_BAR)
// Output: c = a - b

for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
out[i] = (a[i] - b[i]) & ((1 << PARAMS_LOGQ) - 1);
}
}


void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_key_encode(uint16_t *out, const uint16_t *in) {
// Encoding
unsigned int i, j, npieces_word = 8;
unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
uint64_t temp, mask = ((uint64_t)1 << PARAMS_EXTRACTED_BITS) - 1;
uint16_t *pos = out;

for (i = 0; i < nwords; i++) {
temp = 0;
for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
temp |= ((uint64_t)((uint8_t *)in)[i * PARAMS_EXTRACTED_BITS + j]) << (8 * j);
}
for (j = 0; j < npieces_word; j++) {
*pos = (uint16_t)((temp & mask) << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS));
temp >>= PARAMS_EXTRACTED_BITS;
pos++;
}
}
}


void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_key_decode(uint16_t *out, const uint16_t *in) {
// Decoding
unsigned int i, j, index = 0, npieces_word = 8;
unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
uint16_t temp, maskex = ((uint16_t)1 << PARAMS_EXTRACTED_BITS) - 1, maskq = ((uint16_t)1 << PARAMS_LOGQ) - 1;
uint8_t *pos = (uint8_t *)out;
uint64_t templong;

for (i = 0; i < nwords; i++) {
templong = 0;
for (j = 0; j < npieces_word; j++) { // temp = floor(in*2^{-11}+0.5)
temp = ((in[index] & maskq) + (1 << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS - 1))) >> (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS);
templong |= ((uint64_t)(temp & maskex)) << (PARAMS_EXTRACTED_BITS * j);
index++;
}
for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
pos[i * PARAMS_EXTRACTED_BITS + j] = (templong >> (8 * j)) & 0xFF;
}
}
}


void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb) {
// Pack the input uint16 vector into a char output vector, copying lsb bits from each input element.
// If inlen * lsb / 8 > outlen, only outlen * 8 bits are copied.
memset(out, 0, outlen);

size_t i = 0; // whole bytes already filled in
size_t j = 0; // whole uint16_t already copied
uint16_t w = 0; // the leftover, not yet copied
uint8_t bits = 0; // the number of lsb in w

while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
/*
in: | | |********|********|
^
j
w : | ****|
^
bits
out:|**|**|**|**|**|**|**|**|* |
^^
ib
*/
uint8_t b = 0; // bits in out[i] already filled in
while (b < 8) {
int nbits = min(8 - b, bits);
uint16_t mask = (1 << nbits) - 1;
uint8_t t = (uint8_t) ((w >> (bits - nbits)) & mask); // the bits to copy from w to out
out[i] = out[i] + (t << (8 - b - nbits));
b += (uint8_t) nbits;
bits -= (uint8_t) nbits;
w &= ~(mask << bits); // not strictly necessary; mostly for debugging

if (bits == 0) {
if (j < inlen) {
w = in[j];
bits = lsb;
j++;
} else {
break; // the input vector is exhausted
}
}
}
if (b == 8) { // out[i] is filled in
i++;
}
}
}


void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb) {
// Unpack the input char vector into a uint16_t output vector, copying lsb bits
// for each output element from input. outlen must be at least ceil(inlen * 8 / lsb).
memset(out, 0, outlen * sizeof(uint16_t));

size_t i = 0; // whole uint16_t already filled in
size_t j = 0; // whole bytes already copied
uint8_t w = 0; // the leftover, not yet copied
uint8_t bits = 0; // the number of lsb bits of w

while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
/*
in: | | | | | | |**|**|...
^
j
w : | *|
^
bits
out:| *****| *****| *** | |...
^ ^
i b
*/
uint8_t b = 0; // bits in out[i] already filled in
while (b < lsb) {
int nbits = min(lsb - b, bits);
uint16_t mask = (1 << nbits) - 1;
uint8_t t = (w >> (bits - nbits)) & mask; // the bits to copy from w to out
out[i] = out[i] + (t << (lsb - b - nbits));
b += (uint8_t) nbits;
bits -= (uint8_t) nbits;
w &= ~(mask << bits); // not strictly necessary; mostly for debugging

if (bits == 0) {
if (j < inlen) {
w = in[j];
bits = 8;
j++;
} else {
break; // the input vector is exhausted
}
}
}
if (b == lsb) { // out[i] is filled in
i++;
}
}
}


int8_t PQCLEAN_FRODOKEM1344SHAKE_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len) {
// Compare two arrays in constant time.
// Returns 0 if the byte arrays are equal, -1 otherwise.
uint16_t r = 0;

for (size_t i = 0; i < len; i++) {
r |= a[i] ^ b[i];
}

r = (-(int16_t)(r >> 1) | -(int16_t)(r & 1)) >> (8 * sizeof(uint16_t) -1);
return (int8_t)r;
}


void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector) {
// Select one of the two input arrays to be moved to r
// If (selector == 0) then load r with a, else if (selector == -1) load r with b

for (size_t i = 0; i < len; i++) {
r[i] = (~selector & a[i]) | (selector & b[i]);
}
}


void PQCLEAN_FRODOKEM1344SHAKE_CLEAN_clear_bytes(uint8_t *mem, size_t n) {
// Clear 8-bit bytes from memory. "n" indicates the number of bytes to be zeroed.
// This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing.
volatile uint8_t *v = mem;

for (size_t i = 0; i < n; i++) {
v[i] = 0;
}
}

+ 0
- 10
src/kem/frodo/frodokem640shake/clean/CMakeLists.txt View File

@@ -1,10 +0,0 @@
set(
SRC_CLEAN_FRODOKEM640SHAKE
kem.c
matrix_shake.c
noise.c
util.c
)

define_kem_alg(frodo640shake_clean
PQCLEAN_FRODOKEM640SHAKE_OPT "${SRC_CLEAN_FRODOKEM640SHAKE}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 0
- 20
src/kem/frodo/frodokem640shake/clean/api.h View File

@@ -1,20 +0,0 @@
#ifndef PQCLEAN_FRODOKEM640SHAKE_CLEAN_API_H
#define PQCLEAN_FRODOKEM640SHAKE_CLEAN_API_H

#include <stddef.h>
#include <stdint.h>

#define PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_SECRETKEYBYTES 19888 // sizeof(s) + CRYPTO_PUBLICKEYBYTES + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH
#define PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_PUBLICKEYBYTES 9616 // sizeof(seed_A) + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8
#define PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_BYTES 16
#define PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_CIPHERTEXTBYTES 9720 // (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + (PARAMS_LOGQ*PARAMS_NBAR*PARAMS_NBAR)/8

#define PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_ALGNAME "FrodoKEM-640-SHAKE"

int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk);

int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);

int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);

#endif

+ 0
- 21
src/kem/frodo/frodokem640shake/clean/common.h View File

@@ -1,21 +0,0 @@
#ifndef COMMON_H
#define COMMON_H

int PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
int PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(uint16_t *s, size_t n);
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s);
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e);
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b);
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b);
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_key_encode(uint16_t *out, const uint16_t *in);
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_key_decode(uint16_t *out, const uint16_t *in);
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb);
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb);
int8_t PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len);
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector);
void PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(uint8_t *mem, size_t n);
uint16_t PQCLEAN_FRODOKEM640SHAKE_CLEAN_LE_TO_UINT16(uint16_t n);
uint16_t PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(uint16_t n);

#endif

+ 0
- 237
src/kem/frodo/frodokem640shake/clean/kem.c View File

@@ -1,237 +0,0 @@
/********************************************************************************************
* FrodoKEM: Learning with Errors Key Encapsulation
*
* Abstract: Key Encapsulation Mechanism (KEM) based on Frodo
*********************************************************************************************/

#include <stdint.h>
#include <string.h>

#include "fips202.h"
#include "randombytes.h"

#include "api.h"
#include "common.h"
#include "params.h"

int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
// FrodoKEM's key generation
// Outputs: public key pk ( BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 bytes)
// secret key sk (CRYPTO_BYTES + BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH bytes)
uint8_t *pk_seedA = &pk[0];
uint8_t *pk_b = &pk[BYTES_SEED_A];
uint8_t *sk_s = &sk[0];
uint8_t *sk_pk = &sk[CRYPTO_BYTES];
uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t S[2 * PARAMS_N * PARAMS_NBAR] = {0}; // contains secret data
uint16_t *E = &S[PARAMS_N * PARAMS_NBAR]; // contains secret data
uint8_t randomness[2 * CRYPTO_BYTES + BYTES_SEED_A]; // contains secret data via randomness_s and randomness_seedSE
uint8_t *randomness_s = &randomness[0]; // contains secret data
uint8_t *randomness_seedSE = &randomness[CRYPTO_BYTES]; // contains secret data
uint8_t *randomness_z = &randomness[2 * CRYPTO_BYTES];
uint8_t shake_input_seedSE[1 + CRYPTO_BYTES]; // contains secret data

// Generate the secret value s, the seed for S and E, and the seed for the seed for A. Add seed_A to the public key
randombytes(randomness, CRYPTO_BYTES + CRYPTO_BYTES + BYTES_SEED_A);
shake(pk_seedA, BYTES_SEED_A, randomness_z, BYTES_SEED_A);

// Generate S and E, and compute B = A*S + E. Generate A on-the-fly
shake_input_seedSE[0] = 0x5F;
memcpy(&shake_input_seedSE[1], randomness_seedSE, CRYPTO_BYTES);
shake((uint8_t *)S, 2 * PARAMS_N * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
for (size_t i = 0; i < 2 * PARAMS_N * PARAMS_NBAR; i++) {
S[i] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_LE_TO_UINT16(S[i]);
}
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(S, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(E, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_as_plus_e(B, S, E, pk);

// Encode the second part of the public key
PQCLEAN_FRODOKEM640SHAKE_CLEAN_pack(pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, B, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

// Add s, pk and S to the secret key
memcpy(sk_s, randomness_s, CRYPTO_BYTES);
memcpy(sk_pk, pk, CRYPTO_PUBLICKEYBYTES);
for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
S[i] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(S[i]);
}
memcpy(sk_S, S, 2 * PARAMS_N * PARAMS_NBAR);

// Add H(pk) to the secret key
shake(sk_pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);

// Cleanup:
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)E, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(randomness, 2 * CRYPTO_BYTES);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
return 0;
}


int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
// FrodoKEM's key encapsulation
const uint8_t *pk_seedA = &pk[0];
const uint8_t *pk_b = &pk[BYTES_SEED_A];
uint8_t *ct_c1 = &ct[0];
uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t V[PARAMS_NBAR * PARAMS_NBAR] = {0}; // contains secret data
uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR]; // contains secret data
uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR]; // contains secret data
uint8_t G2in[BYTES_PKHASH + BYTES_MU]; // contains secret data via mu
uint8_t *pkh = &G2in[0];
uint8_t *mu = &G2in[BYTES_PKHASH]; // contains secret data
uint8_t G2out[2 * CRYPTO_BYTES]; // contains secret data
uint8_t *seedSE = &G2out[0]; // contains secret data
uint8_t *k = &G2out[CRYPTO_BYTES]; // contains secret data
uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES]; // contains secret data via Fin_k
uint8_t *Fin_ct = &Fin[0];
uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES]; // contains secret data
uint8_t shake_input_seedSE[1 + CRYPTO_BYTES]; // contains secret data

// pkh <- G_1(pk), generate random mu, compute (seedSE || k) = G_2(pkh || mu)
shake(pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);
randombytes(mu, BYTES_MU);
shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

// Generate Sp and Ep, and compute Bp = Sp*A + Ep. Generate A on-the-fly
shake_input_seedSE[0] = 0x96;
memcpy(&shake_input_seedSE[1], seedSE, CRYPTO_BYTES);
shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
Sp[i] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_LE_TO_UINT16(Sp[i]);
}
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sa_plus_e(Bp, Sp, Ep, pk_seedA);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_pack(ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, Bp, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

// Generate Epp, and compute V = Sp*B + Epp
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sb_plus_e(V, B, Sp, Epp);

// Encode mu, and compute C = V + enc(mu) (mod q)
PQCLEAN_FRODOKEM640SHAKE_CLEAN_key_encode(C, (uint16_t *)mu);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_add(C, V, C);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_pack(ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, C, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOGQ);

// Compute ss = F(ct||KK)
memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);
memcpy(Fin_k, k, CRYPTO_BYTES);
shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

// Cleanup:
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)V, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(mu, BYTES_MU);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
return 0;
}


int PQCLEAN_FRODOKEM640SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
// FrodoKEM's key decapsulation
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t W[PARAMS_NBAR * PARAMS_NBAR] = {0}; // contains secret data
uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
uint16_t CC[PARAMS_NBAR * PARAMS_NBAR] = {0};
uint16_t BBp[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR]; // contains secret data
uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR]; // contains secret data
const uint8_t *ct_c1 = &ct[0];
const uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
const uint8_t *sk_s = &sk[0];
const uint8_t *sk_pk = &sk[CRYPTO_BYTES];
const uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
uint16_t S[PARAMS_N * PARAMS_NBAR]; // contains secret data
const uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
const uint8_t *pk_seedA = &sk_pk[0];
const uint8_t *pk_b = &sk_pk[BYTES_SEED_A];
uint8_t G2in[BYTES_PKHASH + BYTES_MU]; // contains secret data via muprime
uint8_t *pkh = &G2in[0];
uint8_t *muprime = &G2in[BYTES_PKHASH]; // contains secret data
uint8_t G2out[2 * CRYPTO_BYTES]; // contains secret data
uint8_t *seedSEprime = &G2out[0]; // contains secret data
uint8_t *kprime = &G2out[CRYPTO_BYTES]; // contains secret data
uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES]; // contains secret data via Fin_k
uint8_t *Fin_ct = &Fin[0];
uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES]; // contains secret data
uint8_t shake_input_seedSEprime[1 + CRYPTO_BYTES]; // contains secret data

for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
S[i] = sk_S[2 * i] | (sk_S[2 * i + 1] << 8);
}

// Compute W = C - Bp*S (mod q), and decode the randomness mu
PQCLEAN_FRODOKEM640SHAKE_CLEAN_unpack(Bp, PARAMS_N * PARAMS_NBAR, ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, PARAMS_LOGQ);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_unpack(C, PARAMS_NBAR * PARAMS_NBAR, ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, PARAMS_LOGQ);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_bs(W, Bp, S);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sub(W, C, W);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_key_decode((uint16_t *)muprime, W);

// Generate (seedSE' || k') = G_2(pkh || mu')
memcpy(pkh, sk_pkh, BYTES_PKHASH);
shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

// Generate Sp and Ep, and compute BBp = Sp*A + Ep. Generate A on-the-fly
shake_input_seedSEprime[0] = 0x96;
memcpy(&shake_input_seedSEprime[1], seedSEprime, CRYPTO_BYTES);
shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSEprime, 1 + CRYPTO_BYTES);
for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
Sp[i] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_LE_TO_UINT16(Sp[i]);
}
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sa_plus_e(BBp, Sp, Ep, pk_seedA);

// Generate Epp, and compute W = Sp*B + Epp
PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sb_plus_e(W, B, Sp, Epp);

// Encode mu, and compute CC = W + enc(mu') (mod q)
PQCLEAN_FRODOKEM640SHAKE_CLEAN_key_encode(CC, (uint16_t *)muprime);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_add(CC, W, CC);

// Prepare input to F
memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);

// Reducing BBp modulo q
for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
BBp[i] = BBp[i] & ((1 << PARAMS_LOGQ) - 1);
}

// If (Bp == BBp & C == CC) then ss = F(ct || k'), else ss = F(ct || s)
// Needs to avoid branching on secret data as per:
// Qian Guo, Thomas Johansson, Alexander Nilsson. A key-recovery timing attack on post-quantum
// primitives using the Fujisaki-Okamoto transformation and its application on FrodoKEM. In CRYPTO 2020.
int8_t selector = PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_verify(Bp, BBp, PARAMS_N * PARAMS_NBAR) | PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_verify(C, CC, PARAMS_NBAR * PARAMS_NBAR);
// If (selector == 0) then load k' to do ss = F(ct || k'), else if (selector == -1) load s to do ss = F(ct || s)
PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_select((uint8_t *)Fin_k, (uint8_t *)kprime, (uint8_t *)sk_s, CRYPTO_BYTES, selector);
shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

// Cleanup:
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)W, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(muprime, BYTES_MU);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES);
PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(shake_input_seedSEprime, 1 + CRYPTO_BYTES);
return 0;
}

+ 0
- 108
src/kem/frodo/frodokem640shake/clean/matrix_shake.c View File

@@ -1,108 +0,0 @@
/********************************************************************************************
* FrodoKEM: Learning with Errors Key Encapsulation
*
* Abstract: matrix arithmetic functions used by the KEM
*********************************************************************************************/

#include <stdint.h>
#include <string.h>

#include "fips202.h"

#include "api.h"
#include "common.h"
#include "params.h"

int PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
// Generate-and-multiply: generate matrix A (N x N) row-wise, multiply by s on the right.
// Inputs: s, e (N x N_BAR)
// Output: out = A*s + e (N x N_BAR)
int j, k;
uint16_t i;
int16_t a_row[4 * PARAMS_N];

for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) {
*((uint32_t *)&out[i]) = *((uint32_t *)&e[i]);
}

uint8_t seed_A_separated[2 + BYTES_SEED_A];
uint16_t *seed_A_origin = (uint16_t *)&seed_A_separated;
memcpy(&seed_A_separated[2], seed_A, BYTES_SEED_A);
for (i = 0; i < PARAMS_N; i += 4) {
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(i + 0);
shake128((unsigned char *)(a_row + 0 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(i + 1);
shake128((unsigned char *)(a_row + 1 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(i + 2);
shake128((unsigned char *)(a_row + 2 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(i + 3);
shake128((unsigned char *)(a_row + 3 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
for (k = 0; k < 4 * PARAMS_N; k++) {
a_row[k] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_LE_TO_UINT16(a_row[k]);
}
for (k = 0; k < PARAMS_NBAR; k++) {
uint16_t sum[4] = {0};
for (j = 0; j < PARAMS_N; j++) { // Matrix-vector multiplication
uint16_t sp = s[k * PARAMS_N + j];
sum[0] += a_row[0 * PARAMS_N + j] * sp; // Go through four lines with same s
sum[1] += a_row[1 * PARAMS_N + j] * sp;
sum[2] += a_row[2 * PARAMS_N + j] * sp;
sum[3] += a_row[3 * PARAMS_N + j] * sp;
}
out[(i + 0)*PARAMS_NBAR + k] += sum[0];
out[(i + 2)*PARAMS_NBAR + k] += sum[2];
out[(i + 1)*PARAMS_NBAR + k] += sum[1];
out[(i + 3)*PARAMS_NBAR + k] += sum[3];
}
}
return 1;
}




int PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
// Generate-and-multiply: generate matrix A (N x N) column-wise, multiply by s' on the left.
// Inputs: s', e' (N_BAR x N)
// Output: out = s'*A + e' (N_BAR x N)
int i, j;
uint16_t kk;
for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) {
*((uint32_t *)&out[i]) = *((uint32_t *)&e[i]);
}

int t = 0;
uint16_t a_cols[4 * PARAMS_N];

int k;
uint8_t seed_A_separated[2 + BYTES_SEED_A];
uint16_t *seed_A_origin = (uint16_t *)&seed_A_separated;
memcpy(&seed_A_separated[2], seed_A, BYTES_SEED_A);
for (kk = 0; kk < PARAMS_N; kk += 4) {
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(kk + 0);
shake128((unsigned char *)(a_cols + 0 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(kk + 1);
shake128((unsigned char *)(a_cols + 1 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(kk + 2);
shake128((unsigned char *)(a_cols + 2 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(kk + 3);
shake128((unsigned char *)(a_cols + 3 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
for (i = 0; i < 4 * PARAMS_N; i++) {
a_cols[i] = PQCLEAN_FRODOKEM640SHAKE_CLEAN_LE_TO_UINT16(a_cols[i]);
}

for (i = 0; i < PARAMS_NBAR; i++) {
uint16_t sum[PARAMS_N] = {0};
for (j = 0; j < 4; j++) {
uint16_t sp = s[i * PARAMS_N + kk + j];
for (k = 0; k < PARAMS_N; k++) { // Matrix-vector multiplication
sum[k] += (uint16_t)(sp * (uint32_t)a_cols[(t + j) * PARAMS_N + k]);
}
}
for (k = 0; k < PARAMS_N; k++) {
out[i * PARAMS_N + k] += sum[k];
}
}
}
return 1;
}

+ 0
- 35
src/kem/frodo/frodokem640shake/clean/noise.c View File

@@ -1,35 +0,0 @@
/********************************************************************************************
* FrodoKEM: Learning with Errors Key Encapsulation
*
* Abstract: noise sampling functions
*********************************************************************************************/

#include <stdint.h>

#include "api.h"
#include "common.h"
#include "params.h"

static const uint16_t CDF_TABLE[CDF_TABLE_LEN] = CDF_TABLE_DATA;

void PQCLEAN_FRODOKEM640SHAKE_CLEAN_sample_n(uint16_t *s, size_t n) {
// Fills vector s with n samples from the noise distribution which requires 16 bits to sample.
// The distribution is specified by its CDF.
// Input: pseudo-random values (2*n bytes) passed in s. The input is overwritten by the output.
size_t i;
unsigned int j;

for (i = 0; i < n; ++i) {
uint16_t sample = 0;
uint16_t prnd = s[i] >> 1; // Drop the least significant bit
uint16_t sign = s[i] & 0x1; // Pick the least significant bit

// No need to compare with the last value.
for (j = 0; j < (unsigned int)(CDF_TABLE_LEN - 1); j++) {
// Constant time comparison: 1 if CDF_TABLE[j] < s, 0 otherwise. Uses the fact that CDF_TABLE[j] and s fit in 15 bits.
sample += (uint16_t)(CDF_TABLE[j] - prnd) >> 15;
}
// Assuming that sign is either 0 or 1, flips sample iff sign = 1
s[i] = ((-sign) ^ sample) + sign;
}
}

+ 0
- 27
src/kem/frodo/frodokem640shake/clean/params.h View File

@@ -1,27 +0,0 @@
#ifndef PARAMS_H
#define PARAMS_H

#define CRYPTO_SECRETKEYBYTES PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_SECRETKEYBYTES
#define CRYPTO_PUBLICKEYBYTES PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_PUBLICKEYBYTES
#define CRYPTO_BYTES PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_BYTES
#define CRYPTO_CIPHERTEXTBYTES PQCLEAN_FRODOKEM640SHAKE_CLEAN_CRYPTO_CIPHERTEXTBYTES

#define PARAMS_N 640
#define PARAMS_NBAR 8
#define PARAMS_LOGQ 15
#define PARAMS_Q (1 << PARAMS_LOGQ)
#define PARAMS_EXTRACTED_BITS 2
#define PARAMS_STRIPE_STEP 8
#define PARAMS_PARALLEL 4
#define BYTES_SEED_A 16
#define BYTES_MU ((PARAMS_EXTRACTED_BITS * PARAMS_NBAR * PARAMS_NBAR) / 8)
#define BYTES_PKHASH CRYPTO_BYTES

// Selecting SHAKE XOF function for the KEM and noise sampling
#define shake shake128

// CDF table
#define CDF_TABLE_DATA {4643, 13363, 20579, 25843, 29227, 31145, 32103, 32525, 32689, 32745, 32762, 32766, 32767}
#define CDF_TABLE_LEN 13

#endif

+ 0
- 264
src/kem/frodo/frodokem640shake/clean/util.c View File

@@ -1,264 +0,0 @@
/********************************************************************************************
* FrodoKEM: Learning with Errors Key Encapsulation
*
* Abstract: additional functions for FrodoKEM
*********************************************************************************************/

#include <stdint.h>
#include <string.h>

#include "api.h"
#include "common.h"
#include "params.h"

static inline uint8_t min(uint8_t x, uint8_t y) {
if (x < y) {
return x;
}
return y;
}

uint16_t PQCLEAN_FRODOKEM640SHAKE_CLEAN_LE_TO_UINT16(uint16_t n) {
return (((uint8_t *) &n)[0] | (((uint8_t *) &n)[1] << 8));
}

uint16_t PQCLEAN_FRODOKEM640SHAKE_CLEAN_UINT16_TO_LE(uint16_t n) {
uint16_t y;
uint8_t *z = (uint8_t *) &y;
z[0] = n & 0xFF;
z[1] = (n & 0xFF00) >> 8;
return y;
}

void PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s) {
// Multiply by s on the right
// Inputs: b (N_BAR x N), s (N x N_BAR)
// Output: out = b*s (N_BAR x N_BAR)
int i, j, k;

for (i = 0; i < PARAMS_NBAR; i++) {
for (j = 0; j < PARAMS_NBAR; j++) {
out[i * PARAMS_NBAR + j] = 0;
for (k = 0; k < PARAMS_N; k++) {
out[i * PARAMS_NBAR + j] += (uint16_t)(b[i * PARAMS_N + k] * (uint32_t)s[j * PARAMS_N + k]);
}
out[i * PARAMS_NBAR + j] = (uint32_t)(out[i * PARAMS_NBAR + j]) & ((1 << PARAMS_LOGQ) - 1);
}
}
}


void PQCLEAN_FRODOKEM640SHAKE_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e) {
// Multiply by s on the left
// Inputs: b (N x N_BAR), s (N_BAR x N), e (N_BAR x N_BAR)
// Output: out = s*b + e (N_BAR x N_BAR)
int i, j, k;

for (k = 0; k < PARAMS_NBAR; k++) {
for (i = 0; i < PARAMS_NBAR; i++) {
out[k * PARAMS_NBAR + i] = e[k * PARAMS_NBAR + i];
for (j = 0; j < PARAMS_N; j++) {
out[k * PARAMS_NBAR + i] += (uint16_t)(s[k * PARAMS_N + j] * (uint32_t)b[j * PARAMS_NBAR + i]);
}
out[k * PARAMS_NBAR + i] = (uint32_t)(out[k * PARAMS_NBAR + i]) & ((1 << PARAMS_LOGQ) - 1);
}
}
}


void PQCLEAN_FRODOKEM640SHAKE_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b) {
// Add a and b
// Inputs: a, b (N_BAR x N_BAR)
// Output: c = a + b

for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
out[i] = (a[i] + b[i]) & ((1 << PARAMS_LOGQ) - 1);
}
}


void PQCLEAN_FRODOKEM640SHAKE_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b) {
// Subtract a and b
// Inputs: a, b (N_BAR x N_BAR)
// Output: c = a - b

for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
out[i] = (a[i] - b[i]) & ((1 << PARAMS_LOGQ) - 1);
}
}


void PQCLEAN_FRODOKEM640SHAKE_CLEAN_key_encode(uint16_t *out, const uint16_t *in) {
// Encoding
unsigned int i, j, npieces_word = 8;
unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
uint64_t temp, mask = ((uint64_t)1 << PARAMS_EXTRACTED_BITS) - 1;
uint16_t *pos = out;

for (i = 0; i < nwords; i++) {
temp = 0;
for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
temp |= ((uint64_t)((uint8_t *)in)[i * PARAMS_EXTRACTED_BITS + j]) << (8 * j);
}
for (j = 0; j < npieces_word; j++) {
*pos = (uint16_t)((temp & mask) << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS));
temp >>= PARAMS_EXTRACTED_BITS;
pos++;
}
}
}


void PQCLEAN_FRODOKEM640SHAKE_CLEAN_key_decode(uint16_t *out, const uint16_t *in) {
// Decoding
unsigned int i, j, index = 0, npieces_word = 8;
unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
uint16_t temp, maskex = ((uint16_t)1 << PARAMS_EXTRACTED_BITS) - 1, maskq = ((uint16_t)1 << PARAMS_LOGQ) - 1;
uint8_t *pos = (uint8_t *)out;
uint64_t templong;

for (i = 0; i < nwords; i++) {
templong = 0;
for (j = 0; j < npieces_word; j++) { // temp = floor(in*2^{-11}+0.5)
temp = ((in[index] & maskq) + (1 << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS - 1))) >> (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS);
templong |= ((uint64_t)(temp & maskex)) << (PARAMS_EXTRACTED_BITS * j);
index++;
}
for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
pos[i * PARAMS_EXTRACTED_BITS + j] = (templong >> (8 * j)) & 0xFF;
}
}
}


void PQCLEAN_FRODOKEM640SHAKE_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb) {
// Pack the input uint16 vector into a char output vector, copying lsb bits from each input element.
// If inlen * lsb / 8 > outlen, only outlen * 8 bits are copied.
memset(out, 0, outlen);

size_t i = 0; // whole bytes already filled in
size_t j = 0; // whole uint16_t already copied
uint16_t w = 0; // the leftover, not yet copied
uint8_t bits = 0; // the number of lsb in w

while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
/*
in: | | |********|********|
^
j
w : | ****|
^
bits
out:|**|**|**|**|**|**|**|**|* |
^^
ib
*/
uint8_t b = 0; // bits in out[i] already filled in
while (b < 8) {
int nbits = min(8 - b, bits);
uint16_t mask = (1 << nbits) - 1;
uint8_t t = (uint8_t) ((w >> (bits - nbits)) & mask); // the bits to copy from w to out
out[i] = out[i] + (t << (8 - b - nbits));
b += (uint8_t) nbits;
bits -= (uint8_t) nbits;
w &= ~(mask << bits); // not strictly necessary; mostly for debugging

if (bits == 0) {
if (j < inlen) {
w = in[j];
bits = lsb;
j++;
} else {
break; // the input vector is exhausted
}
}
}
if (b == 8) { // out[i] is filled in
i++;
}
}
}


void PQCLEAN_FRODOKEM640SHAKE_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb) {
// Unpack the input char vector into a uint16_t output vector, copying lsb bits
// for each output element from input. outlen must be at least ceil(inlen * 8 / lsb).
memset(out, 0, outlen * sizeof(uint16_t));

size_t i = 0; // whole uint16_t already filled in
size_t j = 0; // whole bytes already copied
uint8_t w = 0; // the leftover, not yet copied
uint8_t bits = 0; // the number of lsb bits of w

while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
/*
in: | | | | | | |**|**|...
^
j
w : | *|
^
bits
out:| *****| *****| *** | |...
^ ^
i b
*/
uint8_t b = 0; // bits in out[i] already filled in
while (b < lsb) {
int nbits = min(lsb - b, bits);
uint16_t mask = (1 << nbits) - 1;
uint8_t t = (w >> (bits - nbits)) & mask; // the bits to copy from w to out
out[i] = out[i] + (t << (lsb - b - nbits));
b += (uint8_t) nbits;
bits -= (uint8_t) nbits;
w &= ~(mask << bits); // not strictly necessary; mostly for debugging

if (bits == 0) {
if (j < inlen) {
w = in[j];
bits = 8;
j++;
} else {
break; // the input vector is exhausted
}
}
}
if (b == lsb) { // out[i] is filled in
i++;
}
}
}


int8_t PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len) {
// Compare two arrays in constant time.
// Returns 0 if the byte arrays are equal, -1 otherwise.
uint16_t r = 0;

for (size_t i = 0; i < len; i++) {
r |= a[i] ^ b[i];
}

r = (-(int16_t)(r >> 1) | -(int16_t)(r & 1)) >> (8 * sizeof(uint16_t) -1);
return (int8_t)r;
}


void PQCLEAN_FRODOKEM640SHAKE_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector) {
// Select one of the two input arrays to be moved to r
// If (selector == 0) then load r with a, else if (selector == -1) load r with b

for (size_t i = 0; i < len; i++) {
r[i] = (~selector & a[i]) | (selector & b[i]);
}
}


void PQCLEAN_FRODOKEM640SHAKE_CLEAN_clear_bytes(uint8_t *mem, size_t n) {
// Clear 8-bit bytes from memory. "n" indicates the number of bytes to be zeroed.
// This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing.
volatile uint8_t *v = mem;

for (size_t i = 0; i < n; i++) {
v[i] = 0;
}
}

+ 0
- 10
src/kem/frodo/frodokem976shake/clean/CMakeLists.txt View File

@@ -1,10 +0,0 @@
set(
SRC_CLEAN_FRODOKEM976SHAKE
kem.c
matrix_shake.c
noise.c
util.c
)

define_kem_alg(frodo976shake_clean
PQCLEAN_FRODOKEM976SHAKE_OPT "${SRC_CLEAN_FRODOKEM976SHAKE}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 0
- 20
src/kem/frodo/frodokem976shake/clean/api.h View File

@@ -1,20 +0,0 @@
#ifndef PQCLEAN_FRODOKEM976SHAKE_CLEAN_API_H
#define PQCLEAN_FRODOKEM976SHAKE_CLEAN_API_H

#include <stddef.h>
#include <stdint.h>

#define PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_SECRETKEYBYTES 31296 // sizeof(s) + CRYPTO_PUBLICKEYBYTES + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH
#define PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_PUBLICKEYBYTES 15632 // sizeof(seed_A) + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8
#define PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_BYTES 24
#define PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_CIPHERTEXTBYTES 15744 // (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + (PARAMS_LOGQ*PARAMS_NBAR*PARAMS_NBAR)/8

#define PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_ALGNAME "FrodoKEM-976-SHAKE"

int PQCLEAN_FRODOKEM976SHAKE_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk);

int PQCLEAN_FRODOKEM976SHAKE_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);

int PQCLEAN_FRODOKEM976SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);

#endif

+ 0
- 21
src/kem/frodo/frodokem976shake/clean/common.h View File

@@ -1,21 +0,0 @@
#ifndef COMMON_H
#define COMMON_H

int PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
int PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A);
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(uint16_t *s, size_t n);
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s);
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e);
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b);
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b);
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_key_encode(uint16_t *out, const uint16_t *in);
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_key_decode(uint16_t *out, const uint16_t *in);
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb);
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb);
int8_t PQCLEAN_FRODOKEM976SHAKE_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len);
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector);
void PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(uint8_t *mem, size_t n);
uint16_t PQCLEAN_FRODOKEM976SHAKE_CLEAN_LE_TO_UINT16(uint16_t n);
uint16_t PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(uint16_t n);

#endif

+ 0
- 237
src/kem/frodo/frodokem976shake/clean/kem.c View File

@@ -1,237 +0,0 @@
/********************************************************************************************
* FrodoKEM: Learning with Errors Key Encapsulation
*
* Abstract: Key Encapsulation Mechanism (KEM) based on Frodo
*********************************************************************************************/

#include <stdint.h>
#include <string.h>

#include "fips202.h"
#include "randombytes.h"

#include "api.h"
#include "common.h"
#include "params.h"

int PQCLEAN_FRODOKEM976SHAKE_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
// FrodoKEM's key generation
// Outputs: public key pk ( BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 bytes)
// secret key sk (CRYPTO_BYTES + BYTES_SEED_A + (PARAMS_LOGQ*PARAMS_N*PARAMS_NBAR)/8 + 2*PARAMS_N*PARAMS_NBAR + BYTES_PKHASH bytes)
uint8_t *pk_seedA = &pk[0];
uint8_t *pk_b = &pk[BYTES_SEED_A];
uint8_t *sk_s = &sk[0];
uint8_t *sk_pk = &sk[CRYPTO_BYTES];
uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t S[2 * PARAMS_N * PARAMS_NBAR] = {0}; // contains secret data
uint16_t *E = &S[PARAMS_N * PARAMS_NBAR]; // contains secret data
uint8_t randomness[2 * CRYPTO_BYTES + BYTES_SEED_A]; // contains secret data via randomness_s and randomness_seedSE
uint8_t *randomness_s = &randomness[0]; // contains secret data
uint8_t *randomness_seedSE = &randomness[CRYPTO_BYTES]; // contains secret data
uint8_t *randomness_z = &randomness[2 * CRYPTO_BYTES];
uint8_t shake_input_seedSE[1 + CRYPTO_BYTES]; // contains secret data

// Generate the secret value s, the seed for S and E, and the seed for the seed for A. Add seed_A to the public key
randombytes(randomness, CRYPTO_BYTES + CRYPTO_BYTES + BYTES_SEED_A);
shake(pk_seedA, BYTES_SEED_A, randomness_z, BYTES_SEED_A);

// Generate S and E, and compute B = A*S + E. Generate A on-the-fly
shake_input_seedSE[0] = 0x5F;
memcpy(&shake_input_seedSE[1], randomness_seedSE, CRYPTO_BYTES);
shake((uint8_t *)S, 2 * PARAMS_N * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
for (size_t i = 0; i < 2 * PARAMS_N * PARAMS_NBAR; i++) {
S[i] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_LE_TO_UINT16(S[i]);
}
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(S, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(E, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_as_plus_e(B, S, E, pk);

// Encode the second part of the public key
PQCLEAN_FRODOKEM976SHAKE_CLEAN_pack(pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, B, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

// Add s, pk and S to the secret key
memcpy(sk_s, randomness_s, CRYPTO_BYTES);
memcpy(sk_pk, pk, CRYPTO_PUBLICKEYBYTES);
for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
S[i] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(S[i]);
}
memcpy(sk_S, S, 2 * PARAMS_N * PARAMS_NBAR);

// Add H(pk) to the secret key
shake(sk_pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);

// Cleanup:
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)E, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(randomness, 2 * CRYPTO_BYTES);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
return 0;
}


int PQCLEAN_FRODOKEM976SHAKE_CLEAN_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
// FrodoKEM's key encapsulation
const uint8_t *pk_seedA = &pk[0];
const uint8_t *pk_b = &pk[BYTES_SEED_A];
uint8_t *ct_c1 = &ct[0];
uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t V[PARAMS_NBAR * PARAMS_NBAR] = {0}; // contains secret data
uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR]; // contains secret data
uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR]; // contains secret data
uint8_t G2in[BYTES_PKHASH + BYTES_MU]; // contains secret data via mu
uint8_t *pkh = &G2in[0];
uint8_t *mu = &G2in[BYTES_PKHASH]; // contains secret data
uint8_t G2out[2 * CRYPTO_BYTES]; // contains secret data
uint8_t *seedSE = &G2out[0]; // contains secret data
uint8_t *k = &G2out[CRYPTO_BYTES]; // contains secret data
uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES]; // contains secret data via Fin_k
uint8_t *Fin_ct = &Fin[0];
uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES]; // contains secret data
uint8_t shake_input_seedSE[1 + CRYPTO_BYTES]; // contains secret data

// pkh <- G_1(pk), generate random mu, compute (seedSE || k) = G_2(pkh || mu)
shake(pkh, BYTES_PKHASH, pk, CRYPTO_PUBLICKEYBYTES);
randombytes(mu, BYTES_MU);
shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

// Generate Sp and Ep, and compute Bp = Sp*A + Ep. Generate A on-the-fly
shake_input_seedSE[0] = 0x96;
memcpy(&shake_input_seedSE[1], seedSE, CRYPTO_BYTES);
shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSE, 1 + CRYPTO_BYTES);
for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
Sp[i] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_LE_TO_UINT16(Sp[i]);
}
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sa_plus_e(Bp, Sp, Ep, pk_seedA);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_pack(ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, Bp, PARAMS_N * PARAMS_NBAR, PARAMS_LOGQ);

// Generate Epp, and compute V = Sp*B + Epp
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sb_plus_e(V, B, Sp, Epp);

// Encode mu, and compute C = V + enc(mu) (mod q)
PQCLEAN_FRODOKEM976SHAKE_CLEAN_key_encode(C, (uint16_t *)mu);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_add(C, V, C);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_pack(ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, C, PARAMS_NBAR * PARAMS_NBAR, PARAMS_LOGQ);

// Compute ss = F(ct||KK)
memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);
memcpy(Fin_k, k, CRYPTO_BYTES);
shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

// Cleanup:
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)V, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(mu, BYTES_MU);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(shake_input_seedSE, 1 + CRYPTO_BYTES);
return 0;
}


int PQCLEAN_FRODOKEM976SHAKE_CLEAN_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
// FrodoKEM's key decapsulation
uint16_t B[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t Bp[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t W[PARAMS_NBAR * PARAMS_NBAR] = {0}; // contains secret data
uint16_t C[PARAMS_NBAR * PARAMS_NBAR] = {0};
uint16_t CC[PARAMS_NBAR * PARAMS_NBAR] = {0};
uint16_t BBp[PARAMS_N * PARAMS_NBAR] = {0};
uint16_t Sp[(2 * PARAMS_N + PARAMS_NBAR)*PARAMS_NBAR] = {0}; // contains secret data
uint16_t *Ep = &Sp[PARAMS_N * PARAMS_NBAR]; // contains secret data
uint16_t *Epp = &Sp[2 * PARAMS_N * PARAMS_NBAR]; // contains secret data
const uint8_t *ct_c1 = &ct[0];
const uint8_t *ct_c2 = &ct[(PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8];
const uint8_t *sk_s = &sk[0];
const uint8_t *sk_pk = &sk[CRYPTO_BYTES];
const uint8_t *sk_S = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES];
uint16_t S[PARAMS_N * PARAMS_NBAR]; // contains secret data
const uint8_t *sk_pkh = &sk[CRYPTO_BYTES + CRYPTO_PUBLICKEYBYTES + 2 * PARAMS_N * PARAMS_NBAR];
const uint8_t *pk_seedA = &sk_pk[0];
const uint8_t *pk_b = &sk_pk[BYTES_SEED_A];
uint8_t G2in[BYTES_PKHASH + BYTES_MU]; // contains secret data via muprime
uint8_t *pkh = &G2in[0];
uint8_t *muprime = &G2in[BYTES_PKHASH]; // contains secret data
uint8_t G2out[2 * CRYPTO_BYTES]; // contains secret data
uint8_t *seedSEprime = &G2out[0]; // contains secret data
uint8_t *kprime = &G2out[CRYPTO_BYTES]; // contains secret data
uint8_t Fin[CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES]; // contains secret data via Fin_k
uint8_t *Fin_ct = &Fin[0];
uint8_t *Fin_k = &Fin[CRYPTO_CIPHERTEXTBYTES]; // contains secret data
uint8_t shake_input_seedSEprime[1 + CRYPTO_BYTES]; // contains secret data

for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
S[i] = sk_S[2 * i] | (sk_S[2 * i + 1] << 8);
}

// Compute W = C - Bp*S (mod q), and decode the randomness mu
PQCLEAN_FRODOKEM976SHAKE_CLEAN_unpack(Bp, PARAMS_N * PARAMS_NBAR, ct_c1, (PARAMS_LOGQ * PARAMS_N * PARAMS_NBAR) / 8, PARAMS_LOGQ);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_unpack(C, PARAMS_NBAR * PARAMS_NBAR, ct_c2, (PARAMS_LOGQ * PARAMS_NBAR * PARAMS_NBAR) / 8, PARAMS_LOGQ);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_bs(W, Bp, S);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sub(W, C, W);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_key_decode((uint16_t *)muprime, W);

// Generate (seedSE' || k') = G_2(pkh || mu')
memcpy(pkh, sk_pkh, BYTES_PKHASH);
shake(G2out, CRYPTO_BYTES + CRYPTO_BYTES, G2in, BYTES_PKHASH + BYTES_MU);

// Generate Sp and Ep, and compute BBp = Sp*A + Ep. Generate A on-the-fly
shake_input_seedSEprime[0] = 0x96;
memcpy(&shake_input_seedSEprime[1], seedSEprime, CRYPTO_BYTES);
shake((uint8_t *)Sp, (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR * sizeof(uint16_t), shake_input_seedSEprime, 1 + CRYPTO_BYTES);
for (size_t i = 0; i < (2 * PARAMS_N + PARAMS_NBAR) * PARAMS_NBAR; i++) {
Sp[i] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_LE_TO_UINT16(Sp[i]);
}
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(Sp, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(Ep, PARAMS_N * PARAMS_NBAR);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sa_plus_e(BBp, Sp, Ep, pk_seedA);

// Generate Epp, and compute W = Sp*B + Epp
PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(Epp, PARAMS_NBAR * PARAMS_NBAR);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_unpack(B, PARAMS_N * PARAMS_NBAR, pk_b, CRYPTO_PUBLICKEYBYTES - BYTES_SEED_A, PARAMS_LOGQ);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sb_plus_e(W, B, Sp, Epp);

// Encode mu, and compute CC = W + enc(mu') (mod q)
PQCLEAN_FRODOKEM976SHAKE_CLEAN_key_encode(CC, (uint16_t *)muprime);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_add(CC, W, CC);

// Prepare input to F
memcpy(Fin_ct, ct, CRYPTO_CIPHERTEXTBYTES);

// Reducing BBp modulo q
for (size_t i = 0; i < PARAMS_N * PARAMS_NBAR; i++) {
BBp[i] = BBp[i] & ((1 << PARAMS_LOGQ) - 1);
}

// If (Bp == BBp & C == CC) then ss = F(ct || k'), else ss = F(ct || s)
// Needs to avoid branching on secret data as per:
// Qian Guo, Thomas Johansson, Alexander Nilsson. A key-recovery timing attack on post-quantum
// primitives using the Fujisaki-Okamoto transformation and its application on FrodoKEM. In CRYPTO 2020.
int8_t selector = PQCLEAN_FRODOKEM976SHAKE_CLEAN_ct_verify(Bp, BBp, PARAMS_N * PARAMS_NBAR) | PQCLEAN_FRODOKEM976SHAKE_CLEAN_ct_verify(C, CC, PARAMS_NBAR * PARAMS_NBAR);
// If (selector == 0) then load k' to do ss = F(ct || k'), else if (selector == -1) load s to do ss = F(ct || s)
PQCLEAN_FRODOKEM976SHAKE_CLEAN_ct_select((uint8_t *)Fin_k, (uint8_t *)kprime, (uint8_t *)sk_s, CRYPTO_BYTES, selector);
shake(ss, CRYPTO_BYTES, Fin, CRYPTO_CIPHERTEXTBYTES + CRYPTO_BYTES);

// Cleanup:
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)W, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)Sp, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)S, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)Ep, PARAMS_N * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes((uint8_t *)Epp, PARAMS_NBAR * PARAMS_NBAR * sizeof(uint16_t));
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(muprime, BYTES_MU);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(G2out, 2 * CRYPTO_BYTES);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(Fin_k, CRYPTO_BYTES);
PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(shake_input_seedSEprime, 1 + CRYPTO_BYTES);
return 0;
}

+ 0
- 108
src/kem/frodo/frodokem976shake/clean/matrix_shake.c View File

@@ -1,108 +0,0 @@
/********************************************************************************************
* FrodoKEM: Learning with Errors Key Encapsulation
*
* Abstract: matrix arithmetic functions used by the KEM
*********************************************************************************************/

#include <stdint.h>
#include <string.h>

#include "fips202.h"

#include "api.h"
#include "common.h"
#include "params.h"

int PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
// Generate-and-multiply: generate matrix A (N x N) row-wise, multiply by s on the right.
// Inputs: s, e (N x N_BAR)
// Output: out = A*s + e (N x N_BAR)
int j, k;
uint16_t i;
int16_t a_row[4 * PARAMS_N];

for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) {
*((uint32_t *)&out[i]) = *((uint32_t *)&e[i]);
}

uint8_t seed_A_separated[2 + BYTES_SEED_A];
uint16_t *seed_A_origin = (uint16_t *)&seed_A_separated;
memcpy(&seed_A_separated[2], seed_A, BYTES_SEED_A);
for (i = 0; i < PARAMS_N; i += 4) {
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(i + 0);
shake128((unsigned char *)(a_row + 0 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(i + 1);
shake128((unsigned char *)(a_row + 1 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(i + 2);
shake128((unsigned char *)(a_row + 2 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(i + 3);
shake128((unsigned char *)(a_row + 3 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
for (k = 0; k < 4 * PARAMS_N; k++) {
a_row[k] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_LE_TO_UINT16(a_row[k]);
}
for (k = 0; k < PARAMS_NBAR; k++) {
uint16_t sum[4] = {0};
for (j = 0; j < PARAMS_N; j++) { // Matrix-vector multiplication
uint16_t sp = s[k * PARAMS_N + j];
sum[0] += a_row[0 * PARAMS_N + j] * sp; // Go through four lines with same s
sum[1] += a_row[1 * PARAMS_N + j] * sp;
sum[2] += a_row[2 * PARAMS_N + j] * sp;
sum[3] += a_row[3 * PARAMS_N + j] * sp;
}
out[(i + 0)*PARAMS_NBAR + k] += sum[0];
out[(i + 2)*PARAMS_NBAR + k] += sum[2];
out[(i + 1)*PARAMS_NBAR + k] += sum[1];
out[(i + 3)*PARAMS_NBAR + k] += sum[3];
}
}
return 1;
}




int PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
// Generate-and-multiply: generate matrix A (N x N) column-wise, multiply by s' on the left.
// Inputs: s', e' (N_BAR x N)
// Output: out = s'*A + e' (N_BAR x N)
int i, j;
uint16_t kk;
for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) {
*((uint32_t *)&out[i]) = *((uint32_t *)&e[i]);
}

int t = 0;
uint16_t a_cols[4 * PARAMS_N];

int k;
uint8_t seed_A_separated[2 + BYTES_SEED_A];
uint16_t *seed_A_origin = (uint16_t *)&seed_A_separated;
memcpy(&seed_A_separated[2], seed_A, BYTES_SEED_A);
for (kk = 0; kk < PARAMS_N; kk += 4) {
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(kk + 0);
shake128((unsigned char *)(a_cols + 0 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(kk + 1);
shake128((unsigned char *)(a_cols + 1 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(kk + 2);
shake128((unsigned char *)(a_cols + 2 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
seed_A_origin[0] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(kk + 3);
shake128((unsigned char *)(a_cols + 3 * PARAMS_N), (unsigned long long)(2 * PARAMS_N), seed_A_separated, 2 + BYTES_SEED_A);
for (i = 0; i < 4 * PARAMS_N; i++) {
a_cols[i] = PQCLEAN_FRODOKEM976SHAKE_CLEAN_LE_TO_UINT16(a_cols[i]);
}

for (i = 0; i < PARAMS_NBAR; i++) {
uint16_t sum[PARAMS_N] = {0};
for (j = 0; j < 4; j++) {
uint16_t sp = s[i * PARAMS_N + kk + j];
for (k = 0; k < PARAMS_N; k++) { // Matrix-vector multiplication
sum[k] += (uint16_t)(sp * (uint32_t)a_cols[(t + j) * PARAMS_N + k]);
}
}
for (k = 0; k < PARAMS_N; k++) {
out[i * PARAMS_N + k] += sum[k];
}
}
}
return 1;
}

+ 0
- 35
src/kem/frodo/frodokem976shake/clean/noise.c View File

@@ -1,35 +0,0 @@
/********************************************************************************************
* FrodoKEM: Learning with Errors Key Encapsulation
*
* Abstract: noise sampling functions
*********************************************************************************************/

#include <stdint.h>

#include "api.h"
#include "common.h"
#include "params.h"

static const uint16_t CDF_TABLE[CDF_TABLE_LEN] = CDF_TABLE_DATA;

void PQCLEAN_FRODOKEM976SHAKE_CLEAN_sample_n(uint16_t *s, size_t n) {
// Fills vector s with n samples from the noise distribution which requires 16 bits to sample.
// The distribution is specified by its CDF.
// Input: pseudo-random values (2*n bytes) passed in s. The input is overwritten by the output.
size_t i;
unsigned int j;

for (i = 0; i < n; ++i) {
uint16_t sample = 0;
uint16_t prnd = s[i] >> 1; // Drop the least significant bit
uint16_t sign = s[i] & 0x1; // Pick the least significant bit

// No need to compare with the last value.
for (j = 0; j < (unsigned int)(CDF_TABLE_LEN - 1); j++) {
// Constant time comparison: 1 if CDF_TABLE[j] < s, 0 otherwise. Uses the fact that CDF_TABLE[j] and s fit in 15 bits.
sample += (uint16_t)(CDF_TABLE[j] - prnd) >> 15;
}
// Assuming that sign is either 0 or 1, flips sample iff sign = 1
s[i] = ((-sign) ^ sample) + sign;
}
}

+ 0
- 27
src/kem/frodo/frodokem976shake/clean/params.h View File

@@ -1,27 +0,0 @@
#ifndef PARAMS_H
#define PARAMS_H

#define CRYPTO_SECRETKEYBYTES PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_SECRETKEYBYTES
#define CRYPTO_PUBLICKEYBYTES PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_PUBLICKEYBYTES
#define CRYPTO_BYTES PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_BYTES
#define CRYPTO_CIPHERTEXTBYTES PQCLEAN_FRODOKEM976SHAKE_CLEAN_CRYPTO_CIPHERTEXTBYTES

#define PARAMS_N 976
#define PARAMS_NBAR 8
#define PARAMS_LOGQ 16
#define PARAMS_Q (1 << PARAMS_LOGQ)
#define PARAMS_EXTRACTED_BITS 3
#define PARAMS_STRIPE_STEP 8
#define PARAMS_PARALLEL 4
#define BYTES_SEED_A 16
#define BYTES_MU ((PARAMS_EXTRACTED_BITS * PARAMS_NBAR * PARAMS_NBAR) / 8)
#define BYTES_PKHASH CRYPTO_BYTES

// Selecting SHAKE XOF function for the KEM and noise sampling
#define shake shake256

// CDF table
#define CDF_TABLE_DATA {5638, 15915, 23689, 28571, 31116, 32217, 32613, 32731, 32760, 32766, 32767}
#define CDF_TABLE_LEN 11

#endif

+ 0
- 264
src/kem/frodo/frodokem976shake/clean/util.c View File

@@ -1,264 +0,0 @@
/********************************************************************************************
* FrodoKEM: Learning with Errors Key Encapsulation
*
* Abstract: additional functions for FrodoKEM
*********************************************************************************************/

#include <stdint.h>
#include <string.h>

#include "api.h"
#include "common.h"
#include "params.h"

static inline uint8_t min(uint8_t x, uint8_t y) {
if (x < y) {
return x;
}
return y;
}

uint16_t PQCLEAN_FRODOKEM976SHAKE_CLEAN_LE_TO_UINT16(uint16_t n) {
return (((uint8_t *) &n)[0] | (((uint8_t *) &n)[1] << 8));
}

uint16_t PQCLEAN_FRODOKEM976SHAKE_CLEAN_UINT16_TO_LE(uint16_t n) {
uint16_t y;
uint8_t *z = (uint8_t *) &y;
z[0] = n & 0xFF;
z[1] = (n & 0xFF00) >> 8;
return y;
}

void PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_bs(uint16_t *out, const uint16_t *b, const uint16_t *s) {
// Multiply by s on the right
// Inputs: b (N_BAR x N), s (N x N_BAR)
// Output: out = b*s (N_BAR x N_BAR)
int i, j, k;

for (i = 0; i < PARAMS_NBAR; i++) {
for (j = 0; j < PARAMS_NBAR; j++) {
out[i * PARAMS_NBAR + j] = 0;
for (k = 0; k < PARAMS_N; k++) {
out[i * PARAMS_NBAR + j] += (uint16_t)(b[i * PARAMS_N + k] * (uint32_t)s[j * PARAMS_N + k]);
}
out[i * PARAMS_NBAR + j] = (uint32_t)(out[i * PARAMS_NBAR + j]) & ((1 << PARAMS_LOGQ) - 1);
}
}
}


void PQCLEAN_FRODOKEM976SHAKE_CLEAN_mul_add_sb_plus_e(uint16_t *out, const uint16_t *b, const uint16_t *s, const uint16_t *e) {
// Multiply by s on the left
// Inputs: b (N x N_BAR), s (N_BAR x N), e (N_BAR x N_BAR)
// Output: out = s*b + e (N_BAR x N_BAR)
int i, j, k;

for (k = 0; k < PARAMS_NBAR; k++) {
for (i = 0; i < PARAMS_NBAR; i++) {
out[k * PARAMS_NBAR + i] = e[k * PARAMS_NBAR + i];
for (j = 0; j < PARAMS_N; j++) {
out[k * PARAMS_NBAR + i] += (uint16_t)(s[k * PARAMS_N + j] * (uint32_t)b[j * PARAMS_NBAR + i]);
}
out[k * PARAMS_NBAR + i] = (uint32_t)(out[k * PARAMS_NBAR + i]) & ((1 << PARAMS_LOGQ) - 1);
}
}
}


void PQCLEAN_FRODOKEM976SHAKE_CLEAN_add(uint16_t *out, const uint16_t *a, const uint16_t *b) {
// Add a and b
// Inputs: a, b (N_BAR x N_BAR)
// Output: c = a + b

for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
out[i] = (a[i] + b[i]) & ((1 << PARAMS_LOGQ) - 1);
}
}


void PQCLEAN_FRODOKEM976SHAKE_CLEAN_sub(uint16_t *out, const uint16_t *a, const uint16_t *b) {
// Subtract a and b
// Inputs: a, b (N_BAR x N_BAR)
// Output: c = a - b

for (size_t i = 0; i < (PARAMS_NBAR * PARAMS_NBAR); i++) {
out[i] = (a[i] - b[i]) & ((1 << PARAMS_LOGQ) - 1);
}
}


void PQCLEAN_FRODOKEM976SHAKE_CLEAN_key_encode(uint16_t *out, const uint16_t *in) {
// Encoding
unsigned int i, j, npieces_word = 8;
unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
uint64_t temp, mask = ((uint64_t)1 << PARAMS_EXTRACTED_BITS) - 1;
uint16_t *pos = out;

for (i = 0; i < nwords; i++) {
temp = 0;
for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
temp |= ((uint64_t)((uint8_t *)in)[i * PARAMS_EXTRACTED_BITS + j]) << (8 * j);
}
for (j = 0; j < npieces_word; j++) {
*pos = (uint16_t)((temp & mask) << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS));
temp >>= PARAMS_EXTRACTED_BITS;
pos++;
}
}
}


void PQCLEAN_FRODOKEM976SHAKE_CLEAN_key_decode(uint16_t *out, const uint16_t *in) {
// Decoding
unsigned int i, j, index = 0, npieces_word = 8;
unsigned int nwords = (PARAMS_NBAR * PARAMS_NBAR) / 8;
uint16_t temp, maskex = ((uint16_t)1 << PARAMS_EXTRACTED_BITS) - 1, maskq = ((uint16_t)1 << PARAMS_LOGQ) - 1;
uint8_t *pos = (uint8_t *)out;
uint64_t templong;

for (i = 0; i < nwords; i++) {
templong = 0;
for (j = 0; j < npieces_word; j++) { // temp = floor(in*2^{-11}+0.5)
temp = ((in[index] & maskq) + (1 << (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS - 1))) >> (PARAMS_LOGQ - PARAMS_EXTRACTED_BITS);
templong |= ((uint64_t)(temp & maskex)) << (PARAMS_EXTRACTED_BITS * j);
index++;
}
for (j = 0; j < PARAMS_EXTRACTED_BITS; j++) {
pos[i * PARAMS_EXTRACTED_BITS + j] = (templong >> (8 * j)) & 0xFF;
}
}
}


void PQCLEAN_FRODOKEM976SHAKE_CLEAN_pack(uint8_t *out, size_t outlen, const uint16_t *in, size_t inlen, uint8_t lsb) {
// Pack the input uint16 vector into a char output vector, copying lsb bits from each input element.
// If inlen * lsb / 8 > outlen, only outlen * 8 bits are copied.
memset(out, 0, outlen);

size_t i = 0; // whole bytes already filled in
size_t j = 0; // whole uint16_t already copied
uint16_t w = 0; // the leftover, not yet copied
uint8_t bits = 0; // the number of lsb in w

while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
/*
in: | | |********|********|
^
j
w : | ****|
^
bits
out:|**|**|**|**|**|**|**|**|* |
^^
ib
*/
uint8_t b = 0; // bits in out[i] already filled in
while (b < 8) {
int nbits = min(8 - b, bits);
uint16_t mask = (1 << nbits) - 1;
uint8_t t = (uint8_t) ((w >> (bits - nbits)) & mask); // the bits to copy from w to out
out[i] = out[i] + (t << (8 - b - nbits));
b += (uint8_t) nbits;
bits -= (uint8_t) nbits;
w &= ~(mask << bits); // not strictly necessary; mostly for debugging

if (bits == 0) {
if (j < inlen) {
w = in[j];
bits = lsb;
j++;
} else {
break; // the input vector is exhausted
}
}
}
if (b == 8) { // out[i] is filled in
i++;
}
}
}


void PQCLEAN_FRODOKEM976SHAKE_CLEAN_unpack(uint16_t *out, size_t outlen, const uint8_t *in, size_t inlen, uint8_t lsb) {
// Unpack the input char vector into a uint16_t output vector, copying lsb bits
// for each output element from input. outlen must be at least ceil(inlen * 8 / lsb).
memset(out, 0, outlen * sizeof(uint16_t));

size_t i = 0; // whole uint16_t already filled in
size_t j = 0; // whole bytes already copied
uint8_t w = 0; // the leftover, not yet copied
uint8_t bits = 0; // the number of lsb bits of w

while (i < outlen && (j < inlen || ((j == inlen) && (bits > 0)))) {
/*
in: | | | | | | |**|**|...
^
j
w : | *|
^
bits
out:| *****| *****| *** | |...
^ ^
i b
*/
uint8_t b = 0; // bits in out[i] already filled in
while (b < lsb) {
int nbits = min(lsb - b, bits);
uint16_t mask = (1 << nbits) - 1;
uint8_t t = (w >> (bits - nbits)) & mask; // the bits to copy from w to out
out[i] = out[i] + (t << (lsb - b - nbits));
b += (uint8_t) nbits;
bits -= (uint8_t) nbits;
w &= ~(mask << bits); // not strictly necessary; mostly for debugging

if (bits == 0) {
if (j < inlen) {
w = in[j];
bits = 8;
j++;
} else {
break; // the input vector is exhausted
}
}
}
if (b == lsb) { // out[i] is filled in
i++;
}
}
}


int8_t PQCLEAN_FRODOKEM976SHAKE_CLEAN_ct_verify(const uint16_t *a, const uint16_t *b, size_t len) {
// Compare two arrays in constant time.
// Returns 0 if the byte arrays are equal, -1 otherwise.
uint16_t r = 0;

for (size_t i = 0; i < len; i++) {
r |= a[i] ^ b[i];
}

r = (-(int16_t)(r >> 1) | -(int16_t)(r & 1)) >> (8 * sizeof(uint16_t) -1);
return (int8_t)r;
}


void PQCLEAN_FRODOKEM976SHAKE_CLEAN_ct_select(uint8_t *r, const uint8_t *a, const uint8_t *b, size_t len, int8_t selector) {
// Select one of the two input arrays to be moved to r
// If (selector == 0) then load r with a, else if (selector == -1) load r with b

for (size_t i = 0; i < len; i++) {
r[i] = (~selector & a[i]) | (selector & b[i]);
}
}


void PQCLEAN_FRODOKEM976SHAKE_CLEAN_clear_bytes(uint8_t *mem, size_t n) {
// Clear 8-bit bytes from memory. "n" indicates the number of bytes to be zeroed.
// This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing.
volatile uint8_t *v = mem;

for (size_t i = 0; i < n; i++) {
v[i] = 0;
}
}

+ 0
- 32
src/kem/ntru/ntruhps2048509/avx2/CMakeLists.txt View File

@@ -1,32 +0,0 @@
set(
SRC_AVX2_NTRUHPS2048509
cmov.c
crypto_sort_int32.c
kem.c
owcpa.c
pack3.c
packq.c
poly.c
poly_lift.c
poly_mod_3_Phi_n.s
poly_mod_q_Phi_n.s
poly_r2_inv.c
poly_r2_mul.s
poly_rq_mul.s
poly_rq_to_s3.s
poly_s3_inv.c
sample.c
sample_iid.c
square_126_509_shufbytes.s
square_1_509_patience.s
square_15_509_shufbytes.s
square_252_509_shufbytes.s
square_30_509_shufbytes.s
square_3_509_patience.s
square_63_509_shufbytes.s
square_6_509_patience.s
vec32_sample_iid.s
)

define_kem_alg(ntruhps2048509_avx2
PQCLEAN_NTRUHPS2048509_AVX2 "${SRC_AVX2_NTRUHPS2048509}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 0
- 19
src/kem/ntru/ntruhps2048509/avx2/api.h View File

@@ -1,19 +0,0 @@
#ifndef PQCLEAN_NTRUHPS2048509_AVX2_API_H
#define PQCLEAN_NTRUHPS2048509_AVX2_API_H

#include <stdint.h>

#define PQCLEAN_NTRUHPS2048509_AVX2_CRYPTO_SECRETKEYBYTES 935
#define PQCLEAN_NTRUHPS2048509_AVX2_CRYPTO_PUBLICKEYBYTES 699
#define PQCLEAN_NTRUHPS2048509_AVX2_CRYPTO_CIPHERTEXTBYTES 699
#define PQCLEAN_NTRUHPS2048509_AVX2_CRYPTO_BYTES 32

#define PQCLEAN_NTRUHPS2048509_AVX2_CRYPTO_ALGNAME "ntruhps2048509"

int PQCLEAN_NTRUHPS2048509_AVX2_crypto_kem_keypair(uint8_t *pk, uint8_t *sk);

int PQCLEAN_NTRUHPS2048509_AVX2_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk);

int PQCLEAN_NTRUHPS2048509_AVX2_crypto_kem_dec(uint8_t *k, const uint8_t *c, const uint8_t *sk);

#endif

+ 0
- 11
src/kem/ntru/ntruhps2048509/avx2/cmov.c View File

@@ -1,11 +0,0 @@
#include "cmov.h"

/* b = 1 means mov, b = 0 means don't mov*/
void PQCLEAN_NTRUHPS2048509_AVX2_cmov(unsigned char *r, const unsigned char *x, size_t len, unsigned char b) {
size_t i;

b = (~b + 1);
for (i = 0; i < len; i++) {
r[i] ^= b & (x[i] ^ r[i]);
}
}

+ 0
- 10
src/kem/ntru/ntruhps2048509/avx2/cmov.h View File

@@ -1,10 +0,0 @@
#ifndef VERIFY_H
#define VERIFY_H

#include "params.h"

#include <stddef.h>

void PQCLEAN_NTRUHPS2048509_AVX2_cmov(unsigned char *r, const unsigned char *x, size_t len, unsigned char b);

#endif

+ 0
- 1218
src/kem/ntru/ntruhps2048509/avx2/crypto_sort_int32.c
File diff suppressed because it is too large
View File


+ 0
- 11
src/kem/ntru/ntruhps2048509/avx2/crypto_sort_int32.h View File

@@ -1,11 +0,0 @@
#ifndef CRYPTO_SORT
#define CRYPTO_SORT

#include "params.h"

#include <stddef.h>
#include <stdint.h>

void PQCLEAN_NTRUHPS2048509_AVX2_crypto_sort_int32(int32_t *x, size_t n);

#endif

+ 0
- 63
src/kem/ntru/ntruhps2048509/avx2/kem.c View File

@@ -1,63 +0,0 @@
#include "api.h"
#include "cmov.h"
#include "fips202.h"
#include "owcpa.h"
#include "params.h"
#include "randombytes.h"
#include "sample.h"

// API FUNCTIONS
int PQCLEAN_NTRUHPS2048509_AVX2_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
uint8_t seed[NTRU_SAMPLE_FG_BYTES];

randombytes(seed, NTRU_SAMPLE_FG_BYTES);
PQCLEAN_NTRUHPS2048509_AVX2_owcpa_keypair(pk, sk, seed);

randombytes(sk + NTRU_OWCPA_SECRETKEYBYTES, NTRU_PRFKEYBYTES);

return 0;
}

int PQCLEAN_NTRUHPS2048509_AVX2_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk) {
poly r, m;
uint8_t rm[NTRU_OWCPA_MSGBYTES];
uint8_t rm_seed[NTRU_SAMPLE_RM_BYTES];

randombytes(rm_seed, NTRU_SAMPLE_RM_BYTES);

PQCLEAN_NTRUHPS2048509_AVX2_sample_rm(&r, &m, rm_seed);

PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(rm, &r);
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, &m);
sha3_256(k, rm, NTRU_OWCPA_MSGBYTES);

PQCLEAN_NTRUHPS2048509_AVX2_poly_Z3_to_Zq(&r);
PQCLEAN_NTRUHPS2048509_AVX2_owcpa_enc(c, &r, &m, pk);

return 0;
}

int PQCLEAN_NTRUHPS2048509_AVX2_crypto_kem_dec(uint8_t *k, const uint8_t *c, const uint8_t *sk) {
int i, fail;
uint8_t rm[NTRU_OWCPA_MSGBYTES];
uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES];

fail = PQCLEAN_NTRUHPS2048509_AVX2_owcpa_dec(rm, c, sk);
/* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */
/* See comment in PQCLEAN_NTRUHPS2048509_AVX2_owcpa_dec for details. */

sha3_256(k, rm, NTRU_OWCPA_MSGBYTES);

/* shake(secret PRF key || input ciphertext) */
for (i = 0; i < NTRU_PRFKEYBYTES; i++) {
buf[i] = sk[i + NTRU_OWCPA_SECRETKEYBYTES];
}
for (i = 0; i < NTRU_CIPHERTEXTBYTES; i++) {
buf[NTRU_PRFKEYBYTES + i] = c[i];
}
sha3_256(rm, buf, NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES);

PQCLEAN_NTRUHPS2048509_AVX2_cmov(k, rm, NTRU_SHAREDKEYBYTES, (unsigned char) fail);

return 0;
}

+ 0
- 183
src/kem/ntru/ntruhps2048509/avx2/owcpa.c View File

@@ -1,183 +0,0 @@
#include "owcpa.h"
#include "poly.h"
#include "sample.h"

static int owcpa_check_ciphertext(const unsigned char *ciphertext) {
/* A ciphertext is log2(q)*(n-1) bits packed into bytes. */
/* Check that any unused bits of the final byte are zero. */

uint16_t t = 0;

t = ciphertext[NTRU_CIPHERTEXTBYTES - 1];
t &= 0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG)));

/* We have 0 <= t < 256 */
/* Return 0 on success (t=0), 1 on failure */
return (int) (1 & ((~t + 1) >> 15));
}

static int owcpa_check_r(const poly *r) {
/* A valid r has coefficients in {0,1,q-1} and has r[N-1] = 0 */
/* Note: We may assume that 0 <= r[i] <= q-1 for all i */

int i;
uint32_t t = 0;
uint16_t c;
for (i = 0; i < NTRU_N - 1; i++) {
c = r->coeffs[i];
t |= (c + 1) & (NTRU_Q - 4); /* 0 iff c is in {-1,0,1,2} */
t |= (c + 2) & 4; /* 1 if c = 2, 0 if c is in {-1,0,1} */
}
t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */

/* We have 0 <= t < 2^16. */
/* Return 0 on success (t=0), 1 on failure */
return (int) (1 & ((~t + 1) >> 31));
}

static int owcpa_check_m(const poly *m) {
/* Check that m is in message space, i.e. */
/* (1) |{i : m[i] = 1}| = |{i : m[i] = 2}|, and */
/* (2) |{i : m[i] != 0}| = NTRU_WEIGHT. */
/* Note: We may assume that m has coefficients in {0,1,2}. */

int i;
uint32_t t = 0;
uint16_t ps = 0;
uint16_t ms = 0;
for (i = 0; i < NTRU_N; i++) {
ps += m->coeffs[i] & 1;
ms += m->coeffs[i] & 2;
}
t |= ps ^ (ms >> 1); /* 0 if (1) holds */
t |= ms ^ NTRU_WEIGHT; /* 0 if (1) and (2) hold */

/* We have 0 <= t < 2^16. */
/* Return 0 on success (t=0), 1 on failure */
return (int) (1 & ((~t + 1) >> 31));
}

void PQCLEAN_NTRUHPS2048509_AVX2_owcpa_keypair(unsigned char *pk,
unsigned char *sk,
const unsigned char seed[NTRU_SAMPLE_FG_BYTES]) {
int i;

poly x1, x2, x3, x4, x5;

poly *f = &x1, *g = &x2, *invf_mod3 = &x3;
poly *gf = &x3, *invgf = &x4, *tmp = &x5;
poly *invh = &x3, *h = &x3;

PQCLEAN_NTRUHPS2048509_AVX2_sample_fg(f, g, seed);

PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_inv(invf_mod3, f);
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(sk, f);
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(sk + NTRU_PACK_TRINARY_BYTES, invf_mod3);

/* Lift coeffs of f and g from Z_p to Z_q */
PQCLEAN_NTRUHPS2048509_AVX2_poly_Z3_to_Zq(f);
PQCLEAN_NTRUHPS2048509_AVX2_poly_Z3_to_Zq(g);


/* g = 3*g */
for (i = 0; i < NTRU_N; i++) {
g->coeffs[i] = 3 * g->coeffs[i];
}

PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(gf, g, f);

PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_inv(invgf, gf);

PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(tmp, invgf, f);
PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_mul(invh, tmp, f);
PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_tobytes(sk + 2 * NTRU_PACK_TRINARY_BYTES, invh);

PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(tmp, invgf, g);
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(h, tmp, g);
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_tobytes(pk, h);
}


void PQCLEAN_NTRUHPS2048509_AVX2_owcpa_enc(unsigned char *c,
const poly *r,
const poly *m,
const unsigned char *pk) {
int i;
poly x1, x2;
poly *h = &x1, *liftm = &x1;
poly *ct = &x2;

PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_frombytes(h, pk);

PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(ct, r, h);

PQCLEAN_NTRUHPS2048509_AVX2_poly_lift(liftm, m);
for (i = 0; i < NTRU_N; i++) {
ct->coeffs[i] = ct->coeffs[i] + liftm->coeffs[i];
}

PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_tobytes(c, ct);
}

int PQCLEAN_NTRUHPS2048509_AVX2_owcpa_dec(unsigned char *rm,
const unsigned char *ciphertext,
const unsigned char *secretkey) {
int i;
int fail;
poly x1, x2, x3, x4;

poly *c = &x1, *f = &x2, *cf = &x3;
poly *mf = &x2, *finv3 = &x3, *m = &x4;
poly *liftm = &x2, *invh = &x3, *r = &x4;
poly *b = &x1;

PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_frombytes(c, ciphertext);
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_frombytes(f, secretkey);
PQCLEAN_NTRUHPS2048509_AVX2_poly_Z3_to_Zq(f);

PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(cf, c, f);
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_to_S3(mf, cf);

PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_frombytes(finv3, secretkey + NTRU_PACK_TRINARY_BYTES);
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_mul(m, mf, finv3);
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, m);

fail = 0;

/* Check that the unused bits of the last byte of the ciphertext are zero */
fail |= owcpa_check_ciphertext(ciphertext);

/* For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
/* We can avoid re-computing r*h + Lift(m) as long as we check that */
/* r (defined as b/h mod (q, Phi_n)) and m are in the message space. */
/* (m can take any value in S3 in NTRU_HRSS) */
fail |= owcpa_check_m(m);

/* b = c - Lift(m) mod (q, x^n - 1) */
PQCLEAN_NTRUHPS2048509_AVX2_poly_lift(liftm, m);
for (i = 0; i < NTRU_N; i++) {
b->coeffs[i] = c->coeffs[i] - liftm->coeffs[i];
}

/* r = b / h mod (q, Phi_n) */
PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_frombytes(invh, secretkey + 2 * NTRU_PACK_TRINARY_BYTES);
PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_mul(r, b, invh);

/* NOTE: Our definition of r as b/h mod (q, Phi_n) follows Figure 4 of */
/* [Sch18] https://eprint.iacr.org/2018/1174/20181203:032458. */
/* This differs from Figure 10 of Saito--Xagawa--Yamakawa */
/* [SXY17] https://eprint.iacr.org/2017/1005/20180516:055500 */
/* where r gets a final reduction modulo p. */
/* We need this change to use Proposition 1 of [Sch18]. */

/* Proposition 1 of [Sch18] shows that re-encryption with (r,m) yields c. */
/* if and only if fail==0 after the following call to owcpa_check_r */
/* The procedure given in Fig. 8 of [Sch18] can be skipped because we have */
/* c(1) = 0 due to the use of poly_Rq_sum_zero_{to,from}bytes. */
fail |= owcpa_check_r(r);

PQCLEAN_NTRUHPS2048509_AVX2_poly_trinary_Zq_to_Z3(r);
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(rm, r);

return fail;
}

+ 0
- 19
src/kem/ntru/ntruhps2048509/avx2/owcpa.h View File

@@ -1,19 +0,0 @@
#ifndef OWCPA_H
#define OWCPA_H

#include "params.h"
#include "poly.h"

void PQCLEAN_NTRUHPS2048509_AVX2_owcpa_keypair(unsigned char *pk,
unsigned char *sk,
const unsigned char seed[NTRU_SAMPLE_FG_BYTES]);

void PQCLEAN_NTRUHPS2048509_AVX2_owcpa_enc(unsigned char *c,
const poly *r,
const poly *m,
const unsigned char *pk);

int PQCLEAN_NTRUHPS2048509_AVX2_owcpa_dec(unsigned char *rm,
const unsigned char *ciphertext,
const unsigned char *secretkey);
#endif

+ 0
- 46
src/kem/ntru/ntruhps2048509/avx2/pack3.c View File

@@ -1,46 +0,0 @@
#include "poly.h"

void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(unsigned char msg[NTRU_OWCPA_MSGBYTES], const poly *a) {
int i;
unsigned char c;
int j;

for (i = 0; i < NTRU_PACK_DEG / 5; i++) {
c = a->coeffs[5 * i + 4] & 255;
c = (3 * c + a->coeffs[5 * i + 3]) & 255;
c = (3 * c + a->coeffs[5 * i + 2]) & 255;
c = (3 * c + a->coeffs[5 * i + 1]) & 255;
c = (3 * c + a->coeffs[5 * i + 0]) & 255;
msg[i] = c;
}
i = NTRU_PACK_DEG / 5;
c = 0;
for (j = NTRU_PACK_DEG - (5 * i) - 1; j >= 0; j--) {
c = (3 * c + a->coeffs[5 * i + j]) & 255;
}
msg[i] = c;
}

void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_frombytes(poly *r, const unsigned char msg[NTRU_OWCPA_MSGBYTES]) {
int i;
unsigned char c;
int j;

for (i = 0; i < NTRU_PACK_DEG / 5; i++) {
c = msg[i];
r->coeffs[5 * i + 0] = c;
r->coeffs[5 * i + 1] = c * 171 >> 9; // this is division by 3
r->coeffs[5 * i + 2] = c * 57 >> 9; // division by 3^2
r->coeffs[5 * i + 3] = c * 19 >> 9; // division by 3^3
r->coeffs[5 * i + 4] = c * 203 >> 14; // etc.
}
i = NTRU_PACK_DEG / 5;
c = msg[i];
for (j = 0; (5 * i + j) < NTRU_PACK_DEG; j++) {
r->coeffs[5 * i + j] = c;
c = c * 171 >> 9;
}
r->coeffs[NTRU_N - 1] = 0;
PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_3_Phi_n(r);
}


+ 0
- 93
src/kem/ntru/ntruhps2048509/avx2/packq.c View File

@@ -1,93 +0,0 @@
#include "poly.h"

void PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_tobytes(unsigned char *r, const poly *a) {
int i, j;
uint16_t t[8];

for (i = 0; i < NTRU_PACK_DEG / 8; i++) {
for (j = 0; j < 8; j++) {
t[j] = MODQ(a->coeffs[8 * i + j]);
}

r[11 * i + 0] = (unsigned char) ( t[0] & 0xff);
r[11 * i + 1] = (unsigned char) ((t[0] >> 8) | ((t[1] & 0x1f) << 3));
r[11 * i + 2] = (unsigned char) ((t[1] >> 5) | ((t[2] & 0x03) << 6));
r[11 * i + 3] = (unsigned char) ((t[2] >> 2) & 0xff);
r[11 * i + 4] = (unsigned char) ((t[2] >> 10) | ((t[3] & 0x7f) << 1));
r[11 * i + 5] = (unsigned char) ((t[3] >> 7) | ((t[4] & 0x0f) << 4));
r[11 * i + 6] = (unsigned char) ((t[4] >> 4) | ((t[5] & 0x01) << 7));
r[11 * i + 7] = (unsigned char) ((t[5] >> 1) & 0xff);
r[11 * i + 8] = (unsigned char) ((t[5] >> 9) | ((t[6] & 0x3f) << 2));
r[11 * i + 9] = (unsigned char) ((t[6] >> 6) | ((t[7] & 0x07) << 5));
r[11 * i + 10] = (unsigned char) ((t[7] >> 3));
}

for (j = 0; j < NTRU_PACK_DEG - 8 * i; j++) {
t[j] = MODQ(a->coeffs[8 * i + j]);
}
for (; j < 8; j++) {
t[j] = 0;
}

switch (NTRU_PACK_DEG & 0x07) {
// cases 0 and 6 are impossible since 2 generates (Z/n)* and
// p mod 8 in {1, 7} implies that 2 is a quadratic residue.
case 4:
r[11 * i + 0] = (unsigned char) (t[0] & 0xff);
r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3);
r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6);
r[11 * i + 3] = (unsigned char) (t[2] >> 2) & 0xff;
r[11 * i + 4] = (unsigned char) (t[2] >> 10) | ((t[3] & 0x7f) << 1);
r[11 * i + 5] = (unsigned char) (t[3] >> 7) | ((t[4] & 0x0f) << 4);
break;
case 2:
r[11 * i + 0] = (unsigned char) (t[0] & 0xff);
r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3);
r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6);
break;
}
}

void PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_frombytes(poly *r, const unsigned char *a) {
int i;
for (i = 0; i < NTRU_PACK_DEG / 8; i++) {
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8);
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5);
r->coeffs[8 * i + 2] = (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10);
r->coeffs[8 * i + 3] = (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7);
r->coeffs[8 * i + 4] = (a[11 * i + 5] >> 4) | (((uint16_t)a[11 * i + 6] & 0x7f) << 4);
r->coeffs[8 * i + 5] = (a[11 * i + 6] >> 7) | (((uint16_t)a[11 * i + 7] & 0xff) << 1) | (((uint16_t)a[11 * i + 8] & 0x03) << 9);
r->coeffs[8 * i + 6] = (a[11 * i + 8] >> 2) | (((uint16_t)a[11 * i + 9] & 0x1f) << 6);
r->coeffs[8 * i + 7] = (a[11 * i + 9] >> 5) | (((uint16_t)a[11 * i + 10] & 0xff) << 3);
}
switch (NTRU_PACK_DEG & 0x07) {
// cases 0 and 6 are impossible since 2 generates (Z/n)* and
// p mod 8 in {1, 7} implies that 2 is a quadratic residue.
case 4:
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8);
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5);
r->coeffs[8 * i + 2] = (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10);
r->coeffs[8 * i + 3] = (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7);
break;
case 2:
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8);
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5);
break;
}
r->coeffs[NTRU_N - 1] = 0;
}

void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a) {
PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_tobytes(r, a);
}

void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_frombytes(poly *r, const unsigned char *a) {
int i;
PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_frombytes(r, a);

/* Set r[n-1] so that the sum of coefficients is zero mod q */
r->coeffs[NTRU_N - 1] = 0;
for (i = 0; i < NTRU_PACK_DEG; i++) {
r->coeffs[NTRU_N - 1] -= r->coeffs[i];
}
}

+ 0
- 37
src/kem/ntru/ntruhps2048509/avx2/params.h View File

@@ -1,37 +0,0 @@
#ifndef PARAMS_H
#define PARAMS_H

#define NTRU_HPS
#define NTRU_N 509
#define NTRU_LOGQ 11


/* Do not modify below this line */

#define PAD32(X) ((((X) + 31)/32)*32)

#define NTRU_Q (1 << NTRU_LOGQ)
#define NTRU_WEIGHT (NTRU_Q/8 - 2)

#define NTRU_SEEDBYTES 32
#define NTRU_PRFKEYBYTES 32
#define NTRU_SHAREDKEYBYTES 32

#define NTRU_SAMPLE_IID_BYTES (NTRU_N-1)
#define NTRU_SAMPLE_FT_BYTES ((30*(NTRU_N-1)+7)/8)
#define NTRU_SAMPLE_FG_BYTES (NTRU_SAMPLE_IID_BYTES+NTRU_SAMPLE_FT_BYTES)
#define NTRU_SAMPLE_RM_BYTES (NTRU_SAMPLE_IID_BYTES+NTRU_SAMPLE_FT_BYTES)

#define NTRU_PACK_DEG (NTRU_N-1)
#define NTRU_PACK_TRINARY_BYTES ((NTRU_PACK_DEG+4)/5)

#define NTRU_OWCPA_MSGBYTES (2*NTRU_PACK_TRINARY_BYTES)
#define NTRU_OWCPA_PUBLICKEYBYTES ((NTRU_LOGQ*NTRU_PACK_DEG+7)/8)
#define NTRU_OWCPA_SECRETKEYBYTES (2*NTRU_PACK_TRINARY_BYTES + NTRU_OWCPA_PUBLICKEYBYTES)
#define NTRU_OWCPA_BYTES ((NTRU_LOGQ*NTRU_PACK_DEG+7)/8)

#define NTRU_PUBLICKEYBYTES (NTRU_OWCPA_PUBLICKEYBYTES)
#define NTRU_SECRETKEYBYTES (NTRU_OWCPA_SECRETKEYBYTES + NTRU_PRFKEYBYTES)
#define NTRU_CIPHERTEXTBYTES (NTRU_OWCPA_BYTES)

#endif

+ 0
- 75
src/kem/ntru/ntruhps2048509/avx2/poly.c View File

@@ -1,75 +0,0 @@
#include "poly.h"

/* Map {0, 1, 2} -> {0,1,q-1} in place */
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Z3_to_Zq(poly *r) {
int i;
for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = r->coeffs[i] | ((-(r->coeffs[i] >> 1)) & (NTRU_Q - 1));
}
}

/* Map {0, 1, q-1} -> {0,1,2} in place */
void PQCLEAN_NTRUHPS2048509_AVX2_poly_trinary_Zq_to_Z3(poly *r) {
int i;
for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = MODQ(r->coeffs[i]);
r->coeffs[i] = 3 & (r->coeffs[i] ^ (r->coeffs[i] >> (NTRU_LOGQ - 1)));
}
}

void PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_mul(poly *r, const poly *a, const poly *b) {
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(r, a, b);
PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_q_Phi_n(r);
}

void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_mul(poly *r, const poly *a, const poly *b) {
int i;

/* Our S3 multiplications do not overflow mod q, */
/* so we can re-purpose PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul, as long as we */
/* follow with an explicit reduction mod q. */
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(r, a, b);
for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = MODQ(r->coeffs[i]);
}
PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_3_Phi_n(r);
}

static void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) {

int i;
poly b, c;
poly s;

// for 0..4
// ai = ai * (2 - a*ai) mod q
for (i = 0; i < NTRU_N; i++) {
b.coeffs[i] = -(a->coeffs[i]);
}

for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = ai->coeffs[i];
}

PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(&c, r, &b);
c.coeffs[0] += 2; // c = 2 - a*ai
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(&s, &c, r); // s = ai*c

PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(&c, &s, &b);
c.coeffs[0] += 2; // c = 2 - a*s
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(r, &c, &s); // r = s*c

PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(&c, r, &b);
c.coeffs[0] += 2; // c = 2 - a*r
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(&s, &c, r); // s = r*c

PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(&c, &s, &b);
c.coeffs[0] += 2; // c = 2 - a*s
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(r, &c, &s); // r = s*c
}

void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_inv(poly *r, const poly *a) {
poly ai2;
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_inv(&ai2, a);
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_inv_to_Rq_inv(r, &ai2, a);
}

+ 0
- 41
src/kem/ntru/ntruhps2048509/avx2/poly.h View File

@@ -1,41 +0,0 @@
#ifndef POLY_H
#define POLY_H

#include <immintrin.h>
#include <stdint.h>

#include "params.h"

#define MODQ(X) ((X) & (NTRU_Q-1))

typedef union { /* align to 32 byte boundary for vmovdqa */
uint16_t coeffs[PAD32(NTRU_N)];
__m256i coeffs_x16[PAD32(NTRU_N) / 16];
} poly;

void PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_3_Phi_n(poly *r);
void PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_q_Phi_n(poly *r);

void PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_tobytes(unsigned char *r, const poly *a);
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_frombytes(poly *r, const unsigned char *a);

void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a);
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_sum_zero_frombytes(poly *r, const unsigned char *a);

void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(unsigned char msg[NTRU_PACK_TRINARY_BYTES], const poly *a);
void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_frombytes(poly *r, const unsigned char msg[NTRU_PACK_TRINARY_BYTES]);

void PQCLEAN_NTRUHPS2048509_AVX2_poly_Sq_mul(poly *r, const poly *a, const poly *b);
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_mul(poly *r, const poly *a, const poly *b);
void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_mul(poly *r, const poly *a, const poly *b);
void PQCLEAN_NTRUHPS2048509_AVX2_poly_lift(poly *r, const poly *a);
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_to_S3(poly *r, const poly *a);

void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_inv(poly *r, const poly *a);
void PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_inv(poly *r, const poly *a);
void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_inv(poly *r, const poly *a);

void PQCLEAN_NTRUHPS2048509_AVX2_poly_Z3_to_Zq(poly *r);
void PQCLEAN_NTRUHPS2048509_AVX2_poly_trinary_Zq_to_Z3(poly *r);

#endif

+ 0
- 11
src/kem/ntru/ntruhps2048509/avx2/poly_lift.c View File

@@ -1,11 +0,0 @@
#include "poly.h"

void PQCLEAN_NTRUHPS2048509_AVX2_poly_lift(poly *r, const poly *a) {
int i;
for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = a->coeffs[i];
}
PQCLEAN_NTRUHPS2048509_AVX2_poly_Z3_to_Zq(r);
}



+ 0
- 676
src/kem/ntru/ntruhps2048509/avx2/poly_mod_3_Phi_n.s View File

@@ -1,676 +0,0 @@
.data
.p2align 5
mask_ff:
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
mask_f:
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
mask_3:
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.text
.global PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_3_Phi_n
.global _PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_3_Phi_n
PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_3_Phi_n:
_PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_3_Phi_n:
vmovdqa 992(%rdi), %ymm0
vpermq $3, %ymm0, %ymm0
vpslld $17, %ymm0, %ymm0
vpsrld $16, %ymm0, %ymm1
vpor %ymm0, %ymm1, %ymm0
vbroadcastss %xmm0, %ymm0
vpaddw 0(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 0(%rdi)
vpaddw 32(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 32(%rdi)
vpaddw 64(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 64(%rdi)
vpaddw 96(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 96(%rdi)
vpaddw 128(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 128(%rdi)
vpaddw 160(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 160(%rdi)
vpaddw 192(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 192(%rdi)
vpaddw 224(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 224(%rdi)
vpaddw 256(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 256(%rdi)
vpaddw 288(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 288(%rdi)
vpaddw 320(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 320(%rdi)
vpaddw 352(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 352(%rdi)
vpaddw 384(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 384(%rdi)
vpaddw 416(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 416(%rdi)
vpaddw 448(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 448(%rdi)
vpaddw 480(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 480(%rdi)
vpaddw 512(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 512(%rdi)
vpaddw 544(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 544(%rdi)
vpaddw 576(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 576(%rdi)
vpaddw 608(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 608(%rdi)
vpaddw 640(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 640(%rdi)
vpaddw 672(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 672(%rdi)
vpaddw 704(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 704(%rdi)
vpaddw 736(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 736(%rdi)
vpaddw 768(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 768(%rdi)
vpaddw 800(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 800(%rdi)
vpaddw 832(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 832(%rdi)
vpaddw 864(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 864(%rdi)
vpaddw 896(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 896(%rdi)
vpaddw 928(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 928(%rdi)
vpaddw 960(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 960(%rdi)
vpaddw 992(%rdi), %ymm0, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 992(%rdi)
movw $0, 1018(%rdi)
movw $0, 1020(%rdi)
movw $0, 1022(%rdi)
ret

+ 0
- 80
src/kem/ntru/ntruhps2048509/avx2/poly_mod_q_Phi_n.s View File

@@ -1,80 +0,0 @@
.data
.p2align 5
.text
.global PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_q_Phi_n
.global _PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_q_Phi_n
PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_q_Phi_n:
_PQCLEAN_NTRUHPS2048509_AVX2_poly_mod_q_Phi_n:
vmovdqa 992(%rdi), %ymm0
vpermq $3, %ymm0, %ymm0
vpslld $16, %ymm0, %ymm0
vpsrld $16, %ymm0, %ymm1
vpor %ymm0, %ymm1, %ymm0
vbroadcastss %xmm0, %ymm0
vxorpd %ymm1, %ymm1, %ymm1
vpsubw %ymm0, %ymm1, %ymm0
vpaddw 0(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 0(%rdi)
vpaddw 32(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 32(%rdi)
vpaddw 64(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 64(%rdi)
vpaddw 96(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 96(%rdi)
vpaddw 128(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 128(%rdi)
vpaddw 160(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 160(%rdi)
vpaddw 192(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 192(%rdi)
vpaddw 224(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 224(%rdi)
vpaddw 256(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 256(%rdi)
vpaddw 288(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 288(%rdi)
vpaddw 320(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 320(%rdi)
vpaddw 352(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 352(%rdi)
vpaddw 384(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 384(%rdi)
vpaddw 416(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 416(%rdi)
vpaddw 448(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 448(%rdi)
vpaddw 480(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 480(%rdi)
vpaddw 512(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 512(%rdi)
vpaddw 544(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 544(%rdi)
vpaddw 576(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 576(%rdi)
vpaddw 608(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 608(%rdi)
vpaddw 640(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 640(%rdi)
vpaddw 672(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 672(%rdi)
vpaddw 704(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 704(%rdi)
vpaddw 736(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 736(%rdi)
vpaddw 768(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 768(%rdi)
vpaddw 800(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 800(%rdi)
vpaddw 832(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 832(%rdi)
vpaddw 864(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 864(%rdi)
vpaddw 896(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 896(%rdi)
vpaddw 928(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 928(%rdi)
vpaddw 960(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 960(%rdi)
vpaddw 992(%rdi), %ymm0, %ymm1
vmovdqa %ymm1, 992(%rdi)
ret

+ 0
- 80
src/kem/ntru/ntruhps2048509/avx2/poly_r2_inv.c View File

@@ -1,80 +0,0 @@
#include <immintrin.h>

#include "poly_r2_inv.h"
#include "poly.h"

// Using pdep/pext for these two functions is faster but not a lot since they work on uint64_t which means
// we can only do 4 coefficients at a time. Per byte (where we store 8 coefficients) we will thus need 2 pdeps/pexts
// and an additional shift. In the case of tobytes we also need a logical or.
// On AMD Ryzen pdep/pext are quite slow and the naive solution (looping through and setting each bit individually)
// is preferred.
void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_tobytes(unsigned char *out, const poly *a) {
// Since pext works on a uint64_t we view the coefficient pointer as a 64-bit pointer
// so that we can extract 4 coefficient at a time. It also makes arithmetic a little easier.
uint64_t *coeff_pointer = (void *) a->coeffs;

int i;
for (i = 0; i < 63; i++) {
out[i] = _pext_u64(coeff_pointer[2 * i], 0x1000100010001);
out[i] |= _pext_u64(coeff_pointer[2 * i + 1], 0x1000100010001) << 4;
}
out[i] = _pext_u64(coeff_pointer[2 * 63], 0x1000100010001);
out[i] |= _pext_u64(coeff_pointer[2 * 63 + 1], 0x1) << 4;
}

void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_frombytes(poly *a, const unsigned char *in) {
// Since pdep results in a uint64_t we view the coefficient pointer as a 64-bit pointer
// so that we can store 4 coefficient at a time. It also makes arithmetic a little easier.
uint64_t *coeff_pointer = (void *) a->coeffs;

int i;
for (i = 0; i < 63; i++) {
coeff_pointer[2 * i] = _pdep_u64(in[i], 0x1000100010001);
coeff_pointer[2 * i + 1] = _pdep_u64(in[i] >> 4, 0x1000100010001);
}
// From the last byte we only want 5 bits (since we have 509 total, not 512).
coeff_pointer[2 * 63] = _pdep_u64(in[i], 0x1000100010001);
coeff_pointer[2 * 63 + 1] = _pdep_u64(in[i] >> 4, 0x1);
}

void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_inv(poly *r, const poly *a) {
union {
unsigned char s[64];
__m256i s_x32[2];
} squares[13];
#define s(x) squares[(x)].s

// This relies on the following addition chain:
// 1, 2, 3, 6, 12, 15, 30, 60, 63, 126, 252, 504, 507

PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_tobytes(s(0), a); // TODO alignment

PQCLEAN_NTRUHPS2048509_AVX2_square_1_509(s(1), s(0));
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(1), s(1), s(0));
PQCLEAN_NTRUHPS2048509_AVX2_square_1_509(s(2), s(1));
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(2), s(2), s(0));
PQCLEAN_NTRUHPS2048509_AVX2_square_3_509(s(3), s(2));
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(3), s(3), s(2));
PQCLEAN_NTRUHPS2048509_AVX2_square_6_509(s(4), s(3));
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(4), s(4), s(3));
PQCLEAN_NTRUHPS2048509_AVX2_square_3_509(s(5), s(4));
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(5), s(5), s(2));
PQCLEAN_NTRUHPS2048509_AVX2_square_15_509(s(6), s(5));
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(6), s(6), s(5));
PQCLEAN_NTRUHPS2048509_AVX2_square_30_509(s(7), s(6));
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(7), s(7), s(6));
PQCLEAN_NTRUHPS2048509_AVX2_square_3_509(s(8), s(7));
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(8), s(8), s(2));
PQCLEAN_NTRUHPS2048509_AVX2_square_63_509(s(9), s(8));
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(9), s(9), s(8));
PQCLEAN_NTRUHPS2048509_AVX2_square_126_509(s(10), s(9));
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(10), s(10), s(9));
PQCLEAN_NTRUHPS2048509_AVX2_square_252_509(s(11), s(10));
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(11), s(11), s(10));
PQCLEAN_NTRUHPS2048509_AVX2_square_3_509(s(12), s(11));
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(s(12), s(12), s(2));
PQCLEAN_NTRUHPS2048509_AVX2_square_1_509(s(0), s(12));

PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_frombytes(r, s(0));
#undef s
}

+ 0
- 20
src/kem/ntru/ntruhps2048509/avx2/poly_r2_inv.h View File

@@ -1,20 +0,0 @@
#ifndef POLY_R2_INV_H
#define POLY_R2_INV_H

#include "poly.h"

void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_tobytes(unsigned char *out, const poly *a);
void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_frombytes(poly *a, const unsigned char *in);

extern void PQCLEAN_NTRUHPS2048509_AVX2_square_1_509(unsigned char *out, const unsigned char *a);
extern void PQCLEAN_NTRUHPS2048509_AVX2_square_3_509(unsigned char *out, const unsigned char *a);
extern void PQCLEAN_NTRUHPS2048509_AVX2_square_6_509(unsigned char *out, const unsigned char *a);
extern void PQCLEAN_NTRUHPS2048509_AVX2_square_15_509(unsigned char *out, const unsigned char *a);
extern void PQCLEAN_NTRUHPS2048509_AVX2_square_30_509(unsigned char *out, const unsigned char *a);
extern void PQCLEAN_NTRUHPS2048509_AVX2_square_63_509(unsigned char *out, const unsigned char *a);
extern void PQCLEAN_NTRUHPS2048509_AVX2_square_126_509(unsigned char *out, const unsigned char *a);
extern void PQCLEAN_NTRUHPS2048509_AVX2_square_252_509(unsigned char *out, const unsigned char *a);

extern void PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul(unsigned char *out, const unsigned char *a,
const unsigned char *b);
#endif

+ 0
- 285
src/kem/ntru/ntruhps2048509/avx2/poly_r2_mul.s View File

@@ -1,285 +0,0 @@
.data
.p2align 5
mask1100:
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
mask0110:
.word 0
.word 0
.word 0
.word 0
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 0
.word 0
.word 0
.word 0
mask0011:
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
mask1000:
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 0
.word 65535
.word 65535
.word 65535
.word 65535
mask0111:
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 0
.word 0
.word 0
.word 0
low253:
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 65535
.word 8191
.text
.global PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul
.global _PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul
PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul:
_PQCLEAN_NTRUHPS2048509_AVX2_poly_R2_mul:
vmovdqa 0(%rsi), %ymm0
vmovdqa 32(%rsi), %ymm1
vmovdqa 0(%rdx), %ymm3
vmovdqa 32(%rdx), %ymm4
vpxor %ymm0, %ymm1, %ymm6
vpxor %ymm3, %ymm4, %ymm7
vextracti128 $1, %ymm0, %xmm11
vextracti128 $1, %ymm3, %xmm12
vpclmulqdq $1, %xmm11, %xmm12, %xmm5
vpclmulqdq $16, %xmm11, %xmm12, %xmm14
vpclmulqdq $17, %xmm11, %xmm12, %xmm15
vpxor %xmm5, %xmm14, %xmm14
vpclmulqdq $0, %xmm11, %xmm12, %xmm5
vpermq $16, %ymm14, %ymm14
vinserti128 $1, %xmm15, %ymm15, %ymm15
vpand mask0011(%rip), %ymm5, %ymm5
vpand mask0110(%rip), %ymm14, %ymm14
vpand mask1100(%rip), %ymm15, %ymm15
vpxor %ymm5, %ymm14, %ymm14
vpxor %ymm14, %ymm15, %ymm5
vpxor %xmm0, %xmm11, %xmm11
vpxor %xmm3, %xmm12, %xmm12
vpclmulqdq $1, %xmm11, %xmm12, %xmm13
vpclmulqdq $16, %xmm11, %xmm12, %xmm14
vpclmulqdq $17, %xmm11, %xmm12, %xmm15
vpxor %xmm13, %xmm14, %xmm14
vpclmulqdq $0, %xmm11, %xmm12, %xmm13
vpermq $16, %ymm14, %ymm14
vinserti128 $1, %xmm15, %ymm15, %ymm15
vpand mask0011(%rip), %ymm13, %ymm13
vpand mask0110(%rip), %ymm14, %ymm14
vpand mask1100(%rip), %ymm15, %ymm15
vpxor %ymm13, %ymm14, %ymm14
vpxor %ymm14, %ymm15, %ymm13
vpclmulqdq $1, %xmm0, %xmm3, %xmm2
vpclmulqdq $16, %xmm0, %xmm3, %xmm14
vpclmulqdq $17, %xmm0, %xmm3, %xmm15
vpxor %xmm2, %xmm14, %xmm14
vpclmulqdq $0, %xmm0, %xmm3, %xmm2
vpermq $16, %ymm14, %ymm14
vinserti128 $1, %xmm15, %ymm15, %ymm15
vpand mask0011(%rip), %ymm2, %ymm2
vpand mask0110(%rip), %ymm14, %ymm14
vpand mask1100(%rip), %ymm15, %ymm15
vpxor %ymm2, %ymm14, %ymm14
vpxor %ymm14, %ymm15, %ymm2
vpxor %ymm13, %ymm5, %ymm13
vpxor %ymm13, %ymm2, %ymm13
vpxor %ymm11, %ymm11, %ymm11
vextracti128 $1, %ymm13, %xmm11
vpxor %ymm5, %ymm11, %ymm5
vpxor %ymm11, %ymm11, %ymm11
vinserti128 $1, %xmm13, %ymm11, %ymm11
vpxor %ymm11, %ymm2, %ymm2
vextracti128 $1, %ymm1, %xmm11
vextracti128 $1, %ymm4, %xmm12
vpclmulqdq $1, %xmm11, %xmm12, %xmm9
vpclmulqdq $16, %xmm11, %xmm12, %xmm14
vpclmulqdq $17, %xmm11, %xmm12, %xmm15
vpxor %xmm9, %xmm14, %xmm14
vpclmulqdq $0, %xmm11, %xmm12, %xmm9
vpermq $16, %ymm14, %ymm14
vinserti128 $1, %xmm15, %ymm15, %ymm15
vpand mask0011(%rip), %ymm9, %ymm9
vpand mask0110(%rip), %ymm14, %ymm14
vpand mask1100(%rip), %ymm15, %ymm15
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm14, %ymm15, %ymm9
vpxor %xmm1, %xmm11, %xmm11
vpxor %xmm4, %xmm12, %xmm12
vpclmulqdq $1, %xmm11, %xmm12, %xmm13
vpclmulqdq $16, %xmm11, %xmm12, %xmm14
vpclmulqdq $17, %xmm11, %xmm12, %xmm15
vpxor %xmm13, %xmm14, %xmm14
vpclmulqdq $0, %xmm11, %xmm12, %xmm13
vpermq $16, %ymm14, %ymm14
vinserti128 $1, %xmm15, %ymm15, %ymm15
vpand mask0011(%rip), %ymm13, %ymm13
vpand mask0110(%rip), %ymm14, %ymm14
vpand mask1100(%rip), %ymm15, %ymm15
vpxor %ymm13, %ymm14, %ymm14
vpxor %ymm14, %ymm15, %ymm13
vpclmulqdq $1, %xmm1, %xmm4, %xmm8
vpclmulqdq $16, %xmm1, %xmm4, %xmm14
vpclmulqdq $17, %xmm1, %xmm4, %xmm15
vpxor %xmm8, %xmm14, %xmm14
vpclmulqdq $0, %xmm1, %xmm4, %xmm8
vpermq $16, %ymm14, %ymm14
vinserti128 $1, %xmm15, %ymm15, %ymm15
vpand mask0011(%rip), %ymm8, %ymm8
vpand mask0110(%rip), %ymm14, %ymm14
vpand mask1100(%rip), %ymm15, %ymm15
vpxor %ymm8, %ymm14, %ymm14
vpxor %ymm14, %ymm15, %ymm8
vpxor %ymm13, %ymm9, %ymm13
vpxor %ymm13, %ymm8, %ymm13
vpxor %ymm11, %ymm11, %ymm11
vextracti128 $1, %ymm13, %xmm11
vpxor %ymm9, %ymm11, %ymm9
vpxor %ymm11, %ymm11, %ymm11
vinserti128 $1, %xmm13, %ymm11, %ymm11
vpxor %ymm11, %ymm8, %ymm8
vextracti128 $1, %ymm6, %xmm11
vextracti128 $1, %ymm7, %xmm12
vpclmulqdq $1, %xmm11, %xmm12, %xmm1
vpclmulqdq $16, %xmm11, %xmm12, %xmm14
vpclmulqdq $17, %xmm11, %xmm12, %xmm15
vpxor %xmm1, %xmm14, %xmm14
vpclmulqdq $0, %xmm11, %xmm12, %xmm1
vpermq $16, %ymm14, %ymm14
vinserti128 $1, %xmm15, %ymm15, %ymm15
vpand mask0011(%rip), %ymm1, %ymm1
vpand mask0110(%rip), %ymm14, %ymm14
vpand mask1100(%rip), %ymm15, %ymm15
vpxor %ymm1, %ymm14, %ymm14
vpxor %ymm14, %ymm15, %ymm1
vpxor %xmm6, %xmm11, %xmm11
vpxor %xmm7, %xmm12, %xmm12
vpclmulqdq $1, %xmm11, %xmm12, %xmm13
vpclmulqdq $16, %xmm11, %xmm12, %xmm14
vpclmulqdq $17, %xmm11, %xmm12, %xmm15
vpxor %xmm13, %xmm14, %xmm14
vpclmulqdq $0, %xmm11, %xmm12, %xmm13
vpermq $16, %ymm14, %ymm14
vinserti128 $1, %xmm15, %ymm15, %ymm15
vpand mask0011(%rip), %ymm13, %ymm13
vpand mask0110(%rip), %ymm14, %ymm14
vpand mask1100(%rip), %ymm15, %ymm15
vpxor %ymm13, %ymm14, %ymm14
vpxor %ymm14, %ymm15, %ymm13
vpclmulqdq $1, %xmm6, %xmm7, %xmm0
vpclmulqdq $16, %xmm6, %xmm7, %xmm14
vpclmulqdq $17, %xmm6, %xmm7, %xmm15
vpxor %xmm0, %xmm14, %xmm14
vpclmulqdq $0, %xmm6, %xmm7, %xmm0
vpermq $16, %ymm14, %ymm14
vinserti128 $1, %xmm15, %ymm15, %ymm15
vpand mask0011(%rip), %ymm0, %ymm0
vpand mask0110(%rip), %ymm14, %ymm14
vpand mask1100(%rip), %ymm15, %ymm15
vpxor %ymm0, %ymm14, %ymm14
vpxor %ymm14, %ymm15, %ymm0
vpxor %ymm13, %ymm1, %ymm13
vpxor %ymm13, %ymm0, %ymm13
vpxor %ymm11, %ymm11, %ymm11
vextracti128 $1, %ymm13, %xmm11
vpxor %ymm1, %ymm11, %ymm1
vpxor %ymm11, %ymm11, %ymm11
vinserti128 $1, %xmm13, %ymm11, %ymm11
vpxor %ymm11, %ymm0, %ymm0
vpxor %ymm0, %ymm2, %ymm0
vpxor %ymm0, %ymm8, %ymm0
vpxor %ymm1, %ymm5, %ymm1
vpxor %ymm1, %ymm9, %ymm1
vpxor %ymm0, %ymm5, %ymm5
vpxor %ymm1, %ymm8, %ymm8
vpand mask1000(%rip), %ymm5, %ymm13
vpand mask0111(%rip), %ymm8, %ymm12
vpxor %ymm12, %ymm13, %ymm12
vpsrlq $61, %ymm12, %ymm12
vpermq $147, %ymm12, %ymm12
vpxor %ymm12, %ymm2, %ymm2
vpsllq $3, %ymm8, %ymm12
vpxor %ymm12, %ymm2, %ymm2
vpand mask1000(%rip), %ymm8, %ymm13
vpand mask0111(%rip), %ymm9, %ymm12
vpxor %ymm12, %ymm13, %ymm12
vpsrlq $61, %ymm12, %ymm12
vpermq $147, %ymm12, %ymm12
vpxor %ymm12, %ymm5, %ymm5
vpsllq $3, %ymm9, %ymm12
vpxor %ymm12, %ymm5, %ymm5
vpand low253(%rip), %ymm5, %ymm5
vmovdqa %ymm2, 0(%rdi)
vmovdqa %ymm5, 32(%rdi)
ret

+ 0
- 5520
src/kem/ntru/ntruhps2048509/avx2/poly_rq_mul.s
File diff suppressed because it is too large
View File


+ 0
- 840
src/kem/ntru/ntruhps2048509/avx2/poly_rq_to_s3.s View File

@@ -1,840 +0,0 @@
.data
.p2align 5
mask_modq:
.word 2047
.word 2047
.word 2047
.word 2047
.word 2047
.word 2047
.word 2047
.word 2047
.word 2047
.word 2047
.word 2047
.word 2047
.word 2047
.word 2047
.word 2047
.word 2047
mask_ff:
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
mask_f:
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
mask_3:
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.text
.global PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_to_S3
.global _PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_to_S3
PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_to_S3:
_PQCLEAN_NTRUHPS2048509_AVX2_poly_Rq_to_S3:
vmovdqa mask_modq(%rip), %ymm6
vmovdqa 992(%rsi), %ymm5
vpand %ymm6, %ymm5, %ymm5
vpermq $3, %ymm5, %ymm5
vpslld $16, %ymm5, %ymm1
vpsrld $16, %ymm1, %ymm5
vpor %ymm5, %ymm1, %ymm5
vbroadcastss %xmm5, %ymm5
vpsrlw $10, %ymm5, %ymm1
vpaddw %ymm5, %ymm1, %ymm5
vpsrlw $8, %ymm5, %ymm1
vpand mask_ff(%rip), %ymm5, %ymm5
vpaddw %ymm1, %ymm5, %ymm1
vpand mask_f(%rip), %ymm1, %ymm5
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm5, %ymm1
vpand mask_3(%rip), %ymm1, %ymm5
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm5, %ymm1
vpand mask_3(%rip), %ymm1, %ymm5
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm5, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm5
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm5, %ymm1
vpsllw $1, %ymm1, %ymm5
vmovdqa 0(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 0(%rdi)
vmovdqa 32(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 32(%rdi)
vmovdqa 64(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 64(%rdi)
vmovdqa 96(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 96(%rdi)
vmovdqa 128(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 128(%rdi)
vmovdqa 160(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 160(%rdi)
vmovdqa 192(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 192(%rdi)
vmovdqa 224(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 224(%rdi)
vmovdqa 256(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 256(%rdi)
vmovdqa 288(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 288(%rdi)
vmovdqa 320(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 320(%rdi)
vmovdqa 352(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 352(%rdi)
vmovdqa 384(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 384(%rdi)
vmovdqa 416(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 416(%rdi)
vmovdqa 448(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 448(%rdi)
vmovdqa 480(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 480(%rdi)
vmovdqa 512(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 512(%rdi)
vmovdqa 544(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 544(%rdi)
vmovdqa 576(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 576(%rdi)
vmovdqa 608(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 608(%rdi)
vmovdqa 640(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 640(%rdi)
vmovdqa 672(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 672(%rdi)
vmovdqa 704(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 704(%rdi)
vmovdqa 736(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 736(%rdi)
vmovdqa 768(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 768(%rdi)
vmovdqa 800(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 800(%rdi)
vmovdqa 832(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 832(%rdi)
vmovdqa 864(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 864(%rdi)
vmovdqa 896(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 896(%rdi)
vmovdqa 928(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 928(%rdi)
vmovdqa 960(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 960(%rdi)
vmovdqa 992(%rsi), %ymm0
vpand %ymm6, %ymm0, %ymm0
vpsrlw $10, %ymm0, %ymm1
vpaddw %ymm0, %ymm1, %ymm0
vpaddw %ymm0, %ymm5, %ymm0
vpsrlw $8, %ymm0, %ymm1
vpand mask_ff(%rip), %ymm0, %ymm0
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_f(%rip), %ymm1, %ymm0
vpsrlw $4, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpand mask_3(%rip), %ymm1, %ymm0
vpsrlw $2, %ymm1, %ymm1
vpaddw %ymm1, %ymm0, %ymm1
vpsubw mask_3(%rip), %ymm1, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm0
vpand %ymm15, %ymm1, %ymm14
vpxor %ymm14, %ymm0, %ymm1
vmovdqa %ymm1, 992(%rdi)
ret

+ 0
- 463
src/kem/ntru/ntruhps2048509/avx2/poly_s3_inv.c View File

@@ -1,463 +0,0 @@
#include "poly.h"

#include <immintrin.h>

typedef signed char small;

#define p 508
#define ppad 512
#define numvec 2

typedef __m256i vec256;

/*
This code stores 512-coeff poly as vec256[2].
Order of 256 coefficients in each vec256
is optimized in light of costs of vector instructions:
0,4,...,252 in 64-bit word;
1,5,...,253 in 64-bit word;
2,6,...,254 in 64-bit word;
3,7,...,255 in 64-bit word.
*/

static inline void vec256_frombits(vec256 *v, const small *b) {
int i;

for (i = 0; i < numvec; ++i) {
vec256 b0 = _mm256_loadu_si256((vec256 *) b);
b += 32; /* 0,1,...,31 */
vec256 b1 = _mm256_loadu_si256((vec256 *) b);
b += 32; /* 32,33,... */
vec256 b2 = _mm256_loadu_si256((vec256 *) b);
b += 32;
vec256 b3 = _mm256_loadu_si256((vec256 *) b);
b += 32;
vec256 b4 = _mm256_loadu_si256((vec256 *) b);
b += 32;
vec256 b5 = _mm256_loadu_si256((vec256 *) b);
b += 32;
vec256 b6 = _mm256_loadu_si256((vec256 *) b);
b += 32;
vec256 b7 = _mm256_loadu_si256((vec256 *) b);
b += 32;

vec256 c0 = _mm256_unpacklo_epi32(b0, b1); /* 0 1 2 3 32 33 34 35 4 5 6 7 36 37 38 39 ... 55 */
vec256 c1 = _mm256_unpackhi_epi32(b0, b1); /* 8 9 10 11 40 41 42 43 ... 63 */
vec256 c2 = _mm256_unpacklo_epi32(b2, b3);
vec256 c3 = _mm256_unpackhi_epi32(b2, b3);
vec256 c4 = _mm256_unpacklo_epi32(b4, b5);
vec256 c5 = _mm256_unpackhi_epi32(b4, b5);
vec256 c6 = _mm256_unpacklo_epi32(b6, b7);
vec256 c7 = _mm256_unpackhi_epi32(b6, b7);

vec256 d0 = c0 | _mm256_slli_epi32(c1, 2); /* 0 8, 1 9, 2 10, 3 11, 32 40, 33 41, ..., 55 63 */
vec256 d2 = c2 | _mm256_slli_epi32(c3, 2);
vec256 d4 = c4 | _mm256_slli_epi32(c5, 2);
vec256 d6 = c6 | _mm256_slli_epi32(c7, 2);

vec256 e0 = _mm256_unpacklo_epi64(d0, d2);
vec256 e2 = _mm256_unpackhi_epi64(d0, d2);
vec256 e4 = _mm256_unpacklo_epi64(d4, d6);
vec256 e6 = _mm256_unpackhi_epi64(d4, d6);

vec256 f0 = e0 | _mm256_slli_epi32(e2, 1);
vec256 f4 = e4 | _mm256_slli_epi32(e6, 1);

vec256 g0 = _mm256_permute2x128_si256(f0, f4, 0x20);
vec256 g4 = _mm256_permute2x128_si256(f0, f4, 0x31);

vec256 h = g0 | _mm256_slli_epi32(g4, 4);

#define TRANSPOSE _mm256_set_epi8( 31,27,23,19, 30,26,22,18, 29,25,21,17, 28,24,20,16, 15,11,7,3, 14,10,6,2, 13,9,5,1, 12,8,4,0 )
h = _mm256_shuffle_epi8(h, TRANSPOSE);
h = _mm256_permute4x64_epi64(h, 0xd8);
h = _mm256_shuffle_epi32(h, 0xd8);

*v++ = h;
}
}

static inline void vec256_tobits(const vec256 *v, small *b) {
int i;

for (i = 0; i < numvec; ++i) {
vec256 h = *v++;

h = _mm256_shuffle_epi32(h, 0xd8);
h = _mm256_permute4x64_epi64(h, 0xd8);
h = _mm256_shuffle_epi8(h, TRANSPOSE);

vec256 g0 = h & _mm256_set1_epi8(15);
vec256 g4 = _mm256_srli_epi32(h, 4) & _mm256_set1_epi8(15);

vec256 f0 = _mm256_permute2x128_si256(g0, g4, 0x20);
vec256 f4 = _mm256_permute2x128_si256(g0, g4, 0x31);

vec256 e0 = f0 & _mm256_set1_epi8(5);
vec256 e2 = _mm256_srli_epi32(f0, 1) & _mm256_set1_epi8(5);
vec256 e4 = f4 & _mm256_set1_epi8(5);
vec256 e6 = _mm256_srli_epi32(f4, 1) & _mm256_set1_epi8(5);

vec256 d0 = _mm256_unpacklo_epi32(e0, e2);
vec256 d2 = _mm256_unpackhi_epi32(e0, e2);
vec256 d4 = _mm256_unpacklo_epi32(e4, e6);
vec256 d6 = _mm256_unpackhi_epi32(e4, e6);

vec256 c0 = d0 & _mm256_set1_epi8(1);
vec256 c1 = _mm256_srli_epi32(d0, 2) & _mm256_set1_epi8(1);
vec256 c2 = d2 & _mm256_set1_epi8(1);
vec256 c3 = _mm256_srli_epi32(d2, 2) & _mm256_set1_epi8(1);
vec256 c4 = d4 & _mm256_set1_epi8(1);
vec256 c5 = _mm256_srli_epi32(d4, 2) & _mm256_set1_epi8(1);
vec256 c6 = d6 & _mm256_set1_epi8(1);
vec256 c7 = _mm256_srli_epi32(d6, 2) & _mm256_set1_epi8(1);

vec256 b0 = _mm256_unpacklo_epi64(c0, c1);
vec256 b1 = _mm256_unpackhi_epi64(c0, c1);
vec256 b2 = _mm256_unpacklo_epi64(c2, c3);
vec256 b3 = _mm256_unpackhi_epi64(c2, c3);
vec256 b4 = _mm256_unpacklo_epi64(c4, c5);
vec256 b5 = _mm256_unpackhi_epi64(c4, c5);
vec256 b6 = _mm256_unpacklo_epi64(c6, c7);
vec256 b7 = _mm256_unpackhi_epi64(c6, c7);

_mm256_storeu_si256((vec256 *) b, b0);
b += 32;
_mm256_storeu_si256((vec256 *) b, b1);
b += 32;
_mm256_storeu_si256((vec256 *) b, b2);
b += 32;
_mm256_storeu_si256((vec256 *) b, b3);
b += 32;
_mm256_storeu_si256((vec256 *) b, b4);
b += 32;
_mm256_storeu_si256((vec256 *) b, b5);
b += 32;
_mm256_storeu_si256((vec256 *) b, b6);
b += 32;
_mm256_storeu_si256((vec256 *) b, b7);
b += 32;
}
}

static void vec256_init(vec256 *G0, vec256 *G1, const small *s) {
int i;
small srev[ppad + (ppad - p)];
small si;
small g0[ppad];
small g1[ppad];

for (i = 0; i < p; ++i) {
srev[ppad - 1 - i] = s[i];
}
for (i = 0; i < ppad - p; ++i) {
srev[i] = 0;
}
for (i = p; i < ppad; ++i) {
srev[i + ppad - p] = 0;
}

for (i = 0; i < ppad; ++i) {
si = srev[i + ppad - p];
g0[i] = si & 1;
g1[i] = (si >> 1) & g0[i];
}

vec256_frombits(G0, g0);
vec256_frombits(G1, g1);
}

static void vec256_final(small *out, const vec256 *V0, const vec256 *V1) {
int i;
small v0[ppad];
small v1[ppad];
small v[ppad];
small vrev[ppad + (ppad - p)];

vec256_tobits(V0, v0);
vec256_tobits(V1, v1);

for (i = 0; i < ppad; ++i) {
v[i] = v0[i] + 2 * v1[i] - 4 * (v0[i] & v1[i]);
}

for (i = 0; i < ppad; ++i) {
vrev[i] = v[ppad - 1 - i];
}
for (i = ppad; i < ppad + (ppad - p); ++i) {
vrev[i] = 0;
}

for (i = 0; i < p; ++i) {
out[i] = vrev[i + ppad - p];
}
}

static inline int negative_mask(int x) {
return x >> 31;
}

static inline void vec256_swap(vec256 *f, vec256 *g, int len, vec256 mask) {
vec256 flip;
int i;

for (i = 0; i < len; ++i) {
flip = mask & (f[i] ^ g[i]);
f[i] ^= flip;
g[i] ^= flip;
}
}

static inline void vec256_scale(vec256 *f0, vec256 *f1, const vec256 c0, const vec256 c1) {
int i;

for (i = 0; i < numvec; ++i) {
vec256 f0i = f0[i];
vec256 f1i = f1[i];

f0i &= c0;
f1i ^= c1;
f1i &= f0i;

f0[i] = f0i;
f1[i] = f1i;
}
}

static inline void vec256_eliminate(vec256 *f0, vec256 *f1, vec256 *g0, vec256 *g1, int len, const vec256 c0, const vec256 c1) {
int i;

for (i = 0; i < len; ++i) {
vec256 f0i = f0[i];
vec256 f1i = f1[i];
vec256 g0i = g0[i];
vec256 g1i = g1[i];
vec256 t;

f0i &= c0;
f1i ^= c1;
f1i &= f0i;

t = g0i ^ f0i;
g0[i] = t | (g1i ^ f1i);
g1[i] = (g1i ^ f0i) & (f1i ^ t);
}
}

static inline int vec256_bit0mask(vec256 *f) {
return -(_mm_cvtsi128_si32(_mm256_castsi256_si128(f[0])) & 1);
}

static inline void vec256_divx_1(vec256 *f) {
vec256 f0 = f[0];

unsigned long long low0 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f0));

low0 = low0 >> 1;

f0 = _mm256_blend_epi32(f0, _mm256_set_epi64x(0, 0, 0, low0), 0x3);

f[0] = _mm256_permute4x64_epi64(f0, 0x39);
}

static inline void vec256_divx_2(vec256 *f) {
vec256 f0 = f[0];
vec256 f1 = f[1];

unsigned long long low0 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f0));
unsigned long long low1 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f1));

low0 = (low0 >> 1) | (low1 << 63);
low1 = low1 >> 1;

f0 = _mm256_blend_epi32(f0, _mm256_set_epi64x(0, 0, 0, low0), 0x3);
f1 = _mm256_blend_epi32(f1, _mm256_set_epi64x(0, 0, 0, low1), 0x3);

f[0] = _mm256_permute4x64_epi64(f0, 0x39);
f[1] = _mm256_permute4x64_epi64(f1, 0x39);
}

static inline void vec256_timesx_1(vec256 *f) {
vec256 f0 = _mm256_permute4x64_epi64(f[0], 0x93);

unsigned long long low0 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f0));

low0 = low0 << 1;

f0 = _mm256_blend_epi32(f0, _mm256_set_epi64x(0, 0, 0, low0), 0x3);

f[0] = f0;
}

static inline void vec256_timesx_2(vec256 *f) {
vec256 f0 = _mm256_permute4x64_epi64(f[0], 0x93);
vec256 f1 = _mm256_permute4x64_epi64(f[1], 0x93);

unsigned long long low0 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f0));
unsigned long long low1 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f1));

low1 = (low1 << 1) | (low0 >> 63);
low0 = low0 << 1;

f0 = _mm256_blend_epi32(f0, _mm256_set_epi64x(0, 0, 0, low0), 0x3);
f1 = _mm256_blend_epi32(f1, _mm256_set_epi64x(0, 0, 0, low1), 0x3);

f[0] = f0;
f[1] = f1;
}


static int __poly_S3_inv(unsigned char *outbytes, const unsigned char *inbytes) {
small *out = (void *) outbytes;
small *in = (void *) inbytes;
vec256 F0[numvec];
vec256 F1[numvec];
vec256 G0[numvec];
vec256 G1[numvec];
vec256 V0[numvec];
vec256 V1[numvec];
vec256 R0[numvec];
vec256 R1[numvec];
vec256 c0vec, c1vec;
int loop;
int c0, c1;
int minusdelta = -1;
int swapmask;
vec256 swapvec;

vec256_init(G0, G1, in);
F0[0] = _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1);
F0[1] = _mm256_set_epi32(2147483647, -1, 2147483647, -1, 2147483647, -1, -1, -1);
F1[0] = _mm256_set1_epi32(0);
F1[1] = _mm256_set1_epi32(0);

V0[0] = _mm256_set1_epi32(0);
V1[0] = _mm256_set1_epi32(0);
V0[1] = _mm256_set1_epi32(0);
V1[1] = _mm256_set1_epi32(0);

R0[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, 1);
R1[0] = _mm256_set1_epi32(0);
R0[1] = _mm256_set1_epi32(0);
R1[1] = _mm256_set1_epi32(0);

for (loop = 256; loop > 0; --loop) {
vec256_timesx_1(V0);
vec256_timesx_1(V1);
swapmask = negative_mask(minusdelta) & vec256_bit0mask(G0);

c0 = vec256_bit0mask(F0) & vec256_bit0mask(G0);
c1 = vec256_bit0mask(F1) ^ vec256_bit0mask(G1);
c1 &= c0;

minusdelta ^= swapmask & (minusdelta ^ -minusdelta);
minusdelta -= 1;

swapvec = _mm256_set1_epi32(swapmask);
vec256_swap(F0, G0, 2, swapvec);
vec256_swap(F1, G1, 2, swapvec);

c0vec = _mm256_set1_epi32(c0);
c1vec = _mm256_set1_epi32(c1);

vec256_eliminate(F0, F1, G0, G1, 2, c0vec, c1vec);
vec256_divx_2(G0);
vec256_divx_2(G1);

vec256_swap(V0, R0, 1, swapvec);
vec256_swap(V1, R1, 1, swapvec);
vec256_eliminate(V0, V1, R0, R1, 1, c0vec, c1vec);
}

for (loop = 503; loop > 0; --loop) {
vec256_timesx_2(V0);
vec256_timesx_2(V1);
swapmask = negative_mask(minusdelta) & vec256_bit0mask(G0);

c0 = vec256_bit0mask(F0) & vec256_bit0mask(G0);
c1 = vec256_bit0mask(F1) ^ vec256_bit0mask(G1);
c1 &= c0;

minusdelta ^= swapmask & (minusdelta ^ -minusdelta);
minusdelta -= 1;

swapvec = _mm256_set1_epi32(swapmask);
vec256_swap(F0, G0, 2, swapvec);
vec256_swap(F1, G1, 2, swapvec);

c0vec = _mm256_set1_epi32(c0);
c1vec = _mm256_set1_epi32(c1);

vec256_eliminate(F0, F1, G0, G1, 2, c0vec, c1vec);
vec256_divx_2(G0);
vec256_divx_2(G1);

vec256_swap(V0, R0, 2, swapvec);
vec256_swap(V1, R1, 2, swapvec);
vec256_eliminate(V0, V1, R0, R1, 2, c0vec, c1vec);
}

for (loop = 256; loop > 0; --loop) {
vec256_timesx_2(V0);
vec256_timesx_2(V1);
swapmask = negative_mask(minusdelta) & vec256_bit0mask(G0);

c0 = vec256_bit0mask(F0) & vec256_bit0mask(G0);
c1 = vec256_bit0mask(F1) ^ vec256_bit0mask(G1);
c1 &= c0;

minusdelta ^= swapmask & (minusdelta ^ -minusdelta);
minusdelta -= 1;

swapvec = _mm256_set1_epi32(swapmask);
vec256_swap(F0, G0, 1, swapvec);
vec256_swap(F1, G1, 1, swapvec);

c0vec = _mm256_set1_epi32(c0);
c1vec = _mm256_set1_epi32(c1);

vec256_eliminate(F0, F1, G0, G1, 1, c0vec, c1vec);
vec256_divx_1(G0);
vec256_divx_1(G1);

vec256_swap(V0, R0, 2, swapvec);
vec256_swap(V1, R1, 2, swapvec);
vec256_eliminate(V0, V1, R0, R1, 2, c0vec, c1vec);
}

c0vec = _mm256_set1_epi32(vec256_bit0mask(F0));
c1vec = _mm256_set1_epi32(vec256_bit0mask(F1));
vec256_scale(V0, V1, c0vec, c1vec);

vec256_final(out, V0, V1);
out[p] = negative_mask(minusdelta);
return 0;
}

// This code is based on crypto_core/invhrss701/faster from SUPERCOP. The code was written as a case study
// for the paper "Fast constant-time gcd computation and modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
void PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_inv(poly *r_out, const poly *a) {
const unsigned char *in = (void *) a;
unsigned char *out = (void *) r_out;

small input[ppad];
small output[ppad];
int i;

/* XXX: obviously input/output format should be packed into bytes */

for (i = 0; i < p; ++i) {
small x = in[2 * i] & 3; /* 0 1 2 3 */
x += 1; /* 0 1 2 3 4 5 6, offset by 1 */
x &= (x - 3) >> 5; /* 0 1 2, offset by 1 */
input[i] = x - 1;
}
/* XXX: merge with vec256_init */

__poly_S3_inv((unsigned char *)output, (unsigned char *)input);

for (i = 0; i < p; ++i) {
out[2 * i] = (3 & output[i]) ^ ((3 & output[i]) >> 1);
out[2 * i + 1] = 0;
}
}

+ 0
- 45
src/kem/ntru/ntruhps2048509/avx2/sample.c View File

@@ -1,45 +0,0 @@
#include "sample.h"

void PQCLEAN_NTRUHPS2048509_AVX2_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]) {

PQCLEAN_NTRUHPS2048509_AVX2_sample_iid(f, uniformbytes);
PQCLEAN_NTRUHPS2048509_AVX2_sample_fixed_type(g, uniformbytes + NTRU_SAMPLE_IID_BYTES);
}

void PQCLEAN_NTRUHPS2048509_AVX2_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]) {

PQCLEAN_NTRUHPS2048509_AVX2_sample_iid(r, uniformbytes);
PQCLEAN_NTRUHPS2048509_AVX2_sample_fixed_type(m, uniformbytes + NTRU_SAMPLE_IID_BYTES);
}


void PQCLEAN_NTRUHPS2048509_AVX2_sample_fixed_type(poly *r, const unsigned char u[NTRU_SAMPLE_FT_BYTES]) {
// Assumes NTRU_SAMPLE_FT_BYTES = ceil(30*(n-1)/8)

int32_t s[NTRU_N - 1];
int i;

// Use 30 bits of u per word
for (i = 0; i < (NTRU_N - 1) / 4; i++) {
s[4 * i + 0] = (u[15 * i + 0] << 2) + (u[15 * i + 1] << 10) + (u[15 * i + 2] << 18) + ((uint32_t) u[15 * i + 3] << 26);
s[4 * i + 1] = ((u[15 * i + 3] & 0xc0) >> 4) + (u[15 * i + 4] << 4) + (u[15 * i + 5] << 12) + (u[15 * i + 6] << 20) + ((uint32_t) u[15 * i + 7] << 28);
s[4 * i + 2] = ((u[15 * i + 7] & 0xf0) >> 2) + (u[15 * i + 8] << 6) + (u[15 * i + 9] << 14) + (u[15 * i + 10] << 22) + ((uint32_t) u[15 * i + 11] << 30);
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 16) + ((uint32_t) u[15 * i + 14] << 24);
}

for (i = 0; i < NTRU_WEIGHT / 2; i++) {
s[i] |= 1;
}

for (i = NTRU_WEIGHT / 2; i < NTRU_WEIGHT; i++) {
s[i] |= 2;
}

PQCLEAN_NTRUHPS2048509_AVX2_crypto_sort_int32(s, NTRU_N - 1);

for (i = 0; i < NTRU_N - 1; i++) {
r->coeffs[i] = ((uint16_t) (s[i] & 3));
}

r->coeffs[NTRU_N - 1] = 0;
}

+ 0
- 17
src/kem/ntru/ntruhps2048509/avx2/sample.h View File

@@ -1,17 +0,0 @@
#ifndef SAMPLE_H
#define SAMPLE_H

#include "params.h"
#include "poly.h"

#include "crypto_sort_int32.h"

void PQCLEAN_NTRUHPS2048509_AVX2_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]);
void PQCLEAN_NTRUHPS2048509_AVX2_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]);

void PQCLEAN_NTRUHPS2048509_AVX2_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]);

void PQCLEAN_NTRUHPS2048509_AVX2_sample_fixed_type(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_FT_BYTES]);


#endif

+ 0
- 21
src/kem/ntru/ntruhps2048509/avx2/sample_iid.c View File

@@ -1,21 +0,0 @@
#include <immintrin.h>

#include "sample.h"

extern void PQCLEAN_NTRUHPS2048509_AVX2_vec32_sample_iid(poly *r, const unsigned char uniformbytes[PAD32(NTRU_SAMPLE_IID_BYTES)]);

void PQCLEAN_NTRUHPS2048509_AVX2_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]) {
int i;
union { /* align to 32 byte boundary for vmovdqa */
unsigned char b[PAD32(NTRU_SAMPLE_IID_BYTES)];
__m256i b_x32[PAD32(NTRU_SAMPLE_IID_BYTES) / 32];
} buffer;

for (i = 0; i < NTRU_SAMPLE_IID_BYTES; i++) {
buffer.b[i] = uniformbytes[i];
}
for (i = NTRU_SAMPLE_IID_BYTES; i < PAD32(NTRU_SAMPLE_IID_BYTES); i++) {
buffer.b[i] = 0;
}
PQCLEAN_NTRUHPS2048509_AVX2_vec32_sample_iid(r, buffer.b);
}

+ 0
- 2854
src/kem/ntru/ntruhps2048509/avx2/square_126_509_shufbytes.s
File diff suppressed because it is too large
View File


+ 0
- 5125
src/kem/ntru/ntruhps2048509/avx2/square_15_509_shufbytes.s
File diff suppressed because it is too large
View File


+ 0
- 109
src/kem/ntru/ntruhps2048509/avx2/square_1_509_patience.s View File

@@ -1,109 +0,0 @@
.data
.p2align 5
.text
.global PQCLEAN_NTRUHPS2048509_AVX2_square_1_509
.global _PQCLEAN_NTRUHPS2048509_AVX2_square_1_509
PQCLEAN_NTRUHPS2048509_AVX2_square_1_509:
_PQCLEAN_NTRUHPS2048509_AVX2_square_1_509:
push %r15
push %r14
push %r13
push %r12
push %rbx
push %rbp
mov 0(%rsi), %r11
mov %r11, %r10
and $-0x1, %r10
mov $0x5555555555555555, %rbp
pdep %rbp, %r10, %r10
mov %r10, 0(%rdi)
mov $0xffffffff00000000, %rbx
pext %rbx, %r11, %r10
pdep %rbp, %r10, %r10
mov %r10, 8(%rdi)
mov 8(%rsi), %r11
mov %r11, %r10
and $-0x1, %r10
pdep %rbp, %r10, %r10
mov %r10, 16(%rdi)
pext %rbx, %r11, %r10
pdep %rbp, %r10, %r10
mov %r10, 24(%rdi)
mov 16(%rsi), %r11
mov %r11, %r10
and $-0x1, %r10
pdep %rbp, %r10, %r10
mov %r10, 32(%rdi)
pext %rbx, %r11, %r10
pdep %rbp, %r10, %r10
mov %r10, 40(%rdi)
mov 24(%rsi), %r11
mov %r11, %r10
rol $2, %r10
and $0x2, %r10
xor %r10, 0(%rdi)
mov %r11, %r10
and $-0x1, %r10
pdep %rbp, %r10, %r10
mov %r10, 48(%rdi)
mov $0x7fffffff00000000, %r12
pext %r12, %r11, %r10
mov $0x1555555555555555, %r13
pdep %r13, %r10, %r10
mov %r10, 56(%rdi)
mov 32(%rsi), %r11
mov %r11, %r10
and $0x7fffffff, %r10
mov $0xaaaaaaaaaaaaaaa8, %r14
pdep %r14, %r10, %r10
xor %r10, 0(%rdi)
mov $0x7fffffff80000000, %r15
pext %r15, %r11, %r10
mov $0xaaaaaaaaaaaaaaaa, %r9
pdep %r9, %r10, %r10
xor %r10, 8(%rdi)
mov %r11, %r10
rol $2, %r10
and $0x2, %r10
xor %r10, 16(%rdi)
mov 40(%rsi), %r11
mov %r11, %r10
and $0x7fffffff, %r10
pdep %r14, %r10, %r10
xor %r10, 16(%rdi)
pext %r15, %r11, %r10
pdep %r9, %r10, %r10
xor %r10, 24(%rdi)
mov %r11, %r10
rol $2, %r10
and $0x2, %r10
xor %r10, 32(%rdi)
mov 48(%rsi), %r11
mov %r11, %r10
and $0x7fffffff, %r10
pdep %r14, %r10, %r10
xor %r10, 32(%rdi)
pext %r15, %r11, %r10
pdep %r9, %r10, %r10
xor %r10, 40(%rdi)
mov %r11, %r10
rol $2, %r10
and $0x2, %r10
xor %r10, 48(%rdi)
mov 56(%rsi), %r11
mov %r11, %r10
and $0x7fffffff, %r10
pdep %r14, %r10, %r10
xor %r10, 48(%rdi)
mov $0x1fffffff80000000, %r8
pext %r8, %r11, %r10
mov $0xaaaaaaaaaaaaaaa, %rdx
pdep %rdx, %r10, %r10
xor %r10, 56(%rdi)
pop %rbp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
ret

+ 0
- 3992
src/kem/ntru/ntruhps2048509/avx2/square_252_509_shufbytes.s
File diff suppressed because it is too large
View File


+ 0
- 4316
src/kem/ntru/ntruhps2048509/avx2/square_30_509_shufbytes.s
File diff suppressed because it is too large
View File


+ 0
- 272
src/kem/ntru/ntruhps2048509/avx2/square_3_509_patience.s View File

@@ -1,272 +0,0 @@
.data
.p2align 5
.text
.global PQCLEAN_NTRUHPS2048509_AVX2_square_3_509
.global _PQCLEAN_NTRUHPS2048509_AVX2_square_3_509
PQCLEAN_NTRUHPS2048509_AVX2_square_3_509:
_PQCLEAN_NTRUHPS2048509_AVX2_square_3_509:
push %r15
push %r14
push %r13
push %r12
push %rbx
push %rbp
mov 0(%rsi), %r11
mov %r11, %r10
and $0xff, %r10
mov $0x101010101010101, %rbp
pdep %rbp, %r10, %r10
mov %r10, 0(%rdi)
mov $0xff00, %rbx
pext %rbx, %r11, %r10
pdep %rbp, %r10, %r10
mov %r10, 8(%rdi)
mov $0xff0000, %r12
pext %r12, %r11, %r10
pdep %rbp, %r10, %r10
mov %r10, 16(%rdi)
mov $0xff000000, %r13
pext %r13, %r11, %r10
pdep %rbp, %r10, %r10
mov %r10, 24(%rdi)
mov $0xff00000000, %r14
pext %r14, %r11, %r10
pdep %rbp, %r10, %r10
mov %r10, 32(%rdi)
mov $0xff0000000000, %r15
pext %r15, %r11, %r10
pdep %rbp, %r10, %r10
mov %r10, 40(%rdi)
mov $0xff000000000000, %r9
pext %r9, %r11, %r10
pdep %rbp, %r10, %r10
mov %r10, 48(%rdi)
mov $0xff00000000000000, %r8
pext %r8, %r11, %r10
pdep %rbp, %r10, %r10
mov %r10, 56(%rdi)
mov 8(%rsi), %r11
mov %r11, %r10
and $0xff, %r10
mov $0x808080808080808, %rdx
pdep %rdx, %r10, %r10
xor %r10, 0(%rdi)
pext %rbx, %r11, %r10
pdep %rdx, %r10, %r10
xor %r10, 8(%rdi)
pext %r12, %r11, %r10
pdep %rdx, %r10, %r10
xor %r10, 16(%rdi)
pext %r13, %r11, %r10
pdep %rdx, %r10, %r10
xor %r10, 24(%rdi)
pext %r14, %r11, %r10
pdep %rdx, %r10, %r10
xor %r10, 32(%rdi)
pext %r15, %r11, %r10
pdep %rdx, %r10, %r10
xor %r10, 40(%rdi)
pext %r9, %r11, %r10
pdep %rdx, %r10, %r10
xor %r10, 48(%rdi)
pext %r8, %r11, %r10
pdep %rdx, %r10, %r10
xor %r10, 56(%rdi)
mov 16(%rsi), %r11
mov $0x80000000000000ff, %rcx
pext %rcx, %r11, %r10
mov $0x9010101010101010, %rax
pdep %rax, %r10, %r10
rol $2, %r10
xor %r10, 0(%rdi)
pext %rbx, %r11, %r10
mov $0x4040404040404040, %rbp
pdep %rbp, %r10, %r10
xor %r10, 8(%rdi)
pext %r12, %r11, %r10
pdep %rbp, %r10, %r10
xor %r10, 16(%rdi)
pext %r13, %r11, %r10
pdep %rbp, %r10, %r10
xor %r10, 24(%rdi)
pext %r14, %r11, %r10
pdep %rbp, %r10, %r10
xor %r10, 32(%rdi)
pext %r15, %r11, %r10
pdep %rbp, %r10, %r10
xor %r10, 40(%rdi)
pext %r9, %r11, %r10
pdep %rbp, %r10, %r10
xor %r10, 48(%rdi)
mov $0x7f00000000000000, %r8
pext %r8, %r11, %r10
mov $0x40404040404040, %rdx
pdep %rdx, %r10, %r10
xor %r10, 56(%rdi)
mov 24(%rsi), %r11
mov $0x800000000000007f, %rcx
pext %rcx, %r11, %r10
mov $0x8010101010101010, %rax
pdep %rax, %r10, %r10
rol $5, %r10
xor %r10, 0(%rdi)
mov $0x7f80, %rbx
pext %rbx, %r11, %r10
mov $0x202020202020202, %r12
pdep %r12, %r10, %r10
xor %r10, 8(%rdi)
mov $0x7f8000, %r13
pext %r13, %r11, %r10
pdep %r12, %r10, %r10
xor %r10, 16(%rdi)
mov $0x7f800000, %r14
pext %r14, %r11, %r10
pdep %r12, %r10, %r10
xor %r10, 24(%rdi)
mov $0x7f80000000, %r15
pext %r15, %r11, %r10
pdep %r12, %r10, %r10
xor %r10, 32(%rdi)
mov $0x7f8000000000, %r9
pext %r9, %r11, %r10
pdep %r12, %r10, %r10
xor %r10, 40(%rdi)
mov $0x7f800000000000, %rbp
pext %rbp, %r11, %r10
pdep %r12, %r10, %r10
xor %r10, 48(%rdi)
mov $0x7f80000000000000, %r8
pext %r8, %r11, %r10
pdep %r12, %r10, %r10
xor %r10, 56(%rdi)
mov 32(%rsi), %r11
pext %rcx, %r11, %r10
pdep %rax, %r10, %r10
rol $8, %r10
xor %r10, 0(%rdi)
pext %rbx, %r11, %r10
mov $0x1010101010101010, %rdx
pdep %rdx, %r10, %r10
xor %r10, 8(%rdi)
pext %r13, %r11, %r10
pdep %rdx, %r10, %r10
xor %r10, 16(%rdi)
pext %r14, %r11, %r10
pdep %rdx, %r10, %r10
xor %r10, 24(%rdi)
pext %r15, %r11, %r10
pdep %rdx, %r10, %r10
xor %r10, 32(%rdi)
pext %r9, %r11, %r10
pdep %rdx, %r10, %r10
xor %r10, 40(%rdi)
pext %rbp, %r11, %r10
pdep %rdx, %r10, %r10
xor %r10, 48(%rdi)
pext %r8, %r11, %r10
pdep %rdx, %r10, %r10
xor %r10, 56(%rdi)
mov 40(%rsi), %r11
mov $0xc00000000000007f, %r12
pext %r12, %r11, %r10
mov $0x8090101010101010, %rcx
pdep %rcx, %r10, %r10
rol $11, %r10
xor %r10, 0(%rdi)
pext %rbx, %r11, %r10
mov $0x8080808080808080, %rax
pdep %rax, %r10, %r10
xor %r10, 8(%rdi)
pext %r13, %r11, %r10
pdep %rax, %r10, %r10
xor %r10, 16(%rdi)
pext %r14, %r11, %r10
pdep %rax, %r10, %r10
xor %r10, 24(%rdi)
pext %r15, %r11, %r10
pdep %rax, %r10, %r10
xor %r10, 32(%rdi)
pext %r9, %r11, %r10
pdep %rax, %r10, %r10
xor %r10, 40(%rdi)
pext %rbp, %r11, %r10
pdep %rax, %r10, %r10
xor %r10, 48(%rdi)
mov $0x3f80000000000000, %r8
pext %r8, %r11, %r10
mov $0x80808080808080, %rdx
pdep %rdx, %r10, %r10
xor %r10, 56(%rdi)
mov 48(%rsi), %r11
mov $0xc00000000000003f, %r12
pext %r12, %r11, %r10
mov $0x8080101010101010, %rcx
pdep %rcx, %r10, %r10
rol $14, %r10
xor %r10, 0(%rdi)
mov $0x3fc0, %rbx
pext %rbx, %r11, %r10
mov $0x404040404040404, %r13
pdep %r13, %r10, %r10
xor %r10, 8(%rdi)
mov $0x3fc000, %r14
pext %r14, %r11, %r10
pdep %r13, %r10, %r10
xor %r10, 16(%rdi)
mov $0x3fc00000, %r15
pext %r15, %r11, %r10
pdep %r13, %r10, %r10
xor %r10, 24(%rdi)
mov $0x3fc0000000, %r9
pext %r9, %r11, %r10
pdep %r13, %r10, %r10
xor %r10, 32(%rdi)
mov $0x3fc000000000, %rbp
pext %rbp, %r11, %r10
pdep %r13, %r10, %r10
xor %r10, 40(%rdi)
mov $0x3fc00000000000, %rax
pext %rax, %r11, %r10
pdep %r13, %r10, %r10
xor %r10, 48(%rdi)
mov $0x3fc0000000000000, %r8
pext %r8, %r11, %r10
pdep %r13, %r10, %r10
xor %r10, 56(%rdi)
mov 56(%rsi), %r11
mov %r11, %r10
and $0x3f, %r10
mov $0x2020202020200000, %rdx
pdep %rdx, %r10, %r10
xor %r10, 0(%rdi)
pext %rbx, %r11, %r10
mov $0x2020202020202020, %r12
pdep %r12, %r10, %r10
xor %r10, 8(%rdi)
pext %r14, %r11, %r10
pdep %r12, %r10, %r10
xor %r10, 16(%rdi)
pext %r15, %r11, %r10
pdep %r12, %r10, %r10
xor %r10, 24(%rdi)
pext %r9, %r11, %r10
pdep %r12, %r10, %r10
xor %r10, 32(%rdi)
pext %rbp, %r11, %r10
pdep %r12, %r10, %r10
xor %r10, 40(%rdi)
pext %rax, %r11, %r10
pdep %r12, %r10, %r10
xor %r10, 48(%rdi)
mov $0x1fc0000000000000, %rcx
pext %rcx, %r11, %r10
mov $0x20202020202020, %r8
pdep %r8, %r10, %r10
xor %r10, 56(%rdi)
pop %rbp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
ret

+ 0
- 4186
src/kem/ntru/ntruhps2048509/avx2/square_63_509_shufbytes.s
File diff suppressed because it is too large
View File


+ 0
- 296
src/kem/ntru/ntruhps2048509/avx2/square_6_509_patience.s View File

@@ -1,296 +0,0 @@
.data
.p2align 5
.text
.global PQCLEAN_NTRUHPS2048509_AVX2_square_6_509
.global _PQCLEAN_NTRUHPS2048509_AVX2_square_6_509
PQCLEAN_NTRUHPS2048509_AVX2_square_6_509:
_PQCLEAN_NTRUHPS2048509_AVX2_square_6_509:
push %r15
push %r14
push %r13
push %r12
push %rbx
push %rbp
mov 0(%rsi), %r11
mov $0x101010101010101, %rbp
pext %rbp, %r11, %r10
mov $0x249249, %rbx
pdep %rbx, %r10, %r10
mov %r10, 0(%rdi)
mov $0x202020202020202, %r12
pext %r12, %r11, %r10
pdep %rbx, %r10, %r10
mov %r10, 8(%rdi)
mov $0x404040404040404, %r13
pext %r13, %r11, %r10
pdep %rbx, %r10, %r10
mov %r10, 16(%rdi)
mov $0x808080808080808, %r14
pext %r14, %r11, %r10
pdep %rbx, %r10, %r10
mov %r10, 24(%rdi)
mov $0x1010101010101010, %r15
pext %r15, %r11, %r10
pdep %rbx, %r10, %r10
mov %r10, 32(%rdi)
mov $0x2020202020202020, %r9
pext %r9, %r11, %r10
pdep %rbx, %r10, %r10
mov %r10, 40(%rdi)
mov $0x4040404040404040, %r8
pext %r8, %r11, %r10
pdep %rbx, %r10, %r10
mov %r10, 48(%rdi)
mov $0x8080808080808080, %rdx
pext %rdx, %r11, %r10
pdep %rbx, %r10, %r10
mov %r10, 56(%rdi)
mov 8(%rsi), %r11
pext %rbp, %r11, %r10
mov $0x249249000000, %rcx
pdep %rcx, %r10, %r10
xor %r10, 0(%rdi)
pext %r12, %r11, %r10
pdep %rcx, %r10, %r10
xor %r10, 8(%rdi)
pext %r13, %r11, %r10
pdep %rcx, %r10, %r10
xor %r10, 16(%rdi)
pext %r14, %r11, %r10
pdep %rcx, %r10, %r10
xor %r10, 24(%rdi)
pext %r15, %r11, %r10
pdep %rcx, %r10, %r10
xor %r10, 32(%rdi)
pext %r9, %r11, %r10
pdep %rcx, %r10, %r10
xor %r10, 40(%rdi)
pext %r8, %r11, %r10
pdep %rcx, %r10, %r10
xor %r10, 48(%rdi)
pext %rdx, %r11, %r10
pdep %rcx, %r10, %r10
xor %r10, 56(%rdi)
mov 16(%rsi), %r11
mov $0x8080810101010101, %rax
pext %rax, %r11, %r10
mov $0x9249248000000000, %rbx
pdep %rbx, %r10, %r10
rol $9, %r10
xor %r10, 0(%rdi)
mov $0x101020202020202, %rbp
pext %rbp, %r11, %r10
mov $0x9249240000000000, %r12
pdep %r12, %r10, %r10
rol $6, %r10
xor %r10, 8(%rdi)
mov $0x202040404040404, %r13
pext %r13, %r11, %r10
pdep %r12, %r10, %r10
rol $6, %r10
xor %r10, 16(%rdi)
mov $0x404080808080808, %r14
pext %r14, %r11, %r10
pdep %r12, %r10, %r10
rol $6, %r10
xor %r10, 24(%rdi)
mov $0x808101010101010, %r15
pext %r15, %r11, %r10
pdep %r12, %r10, %r10
rol $6, %r10
xor %r10, 32(%rdi)
mov $0x1010202020202020, %r9
pext %r9, %r11, %r10
pdep %r12, %r10, %r10
rol $6, %r10
xor %r10, 40(%rdi)
mov $0x2020404040404040, %r8
pext %r8, %r11, %r10
pdep %r12, %r10, %r10
rol $6, %r10
xor %r10, 48(%rdi)
mov $0x4040008080808080, %rdx
pext %rdx, %r11, %r10
mov $0x9049240000000000, %rcx
pdep %rcx, %r10, %r10
rol $6, %r10
xor %r10, 56(%rdi)
mov 24(%rsi), %r11
mov $0x8080808080808080, %rax
pext %rax, %r11, %r10
mov $0x124924800, %rbx
pdep %rbx, %r10, %r10
xor %r10, 0(%rdi)
mov $0x101010101010101, %rbp
pext %rbp, %r11, %r10
mov $0x24924900, %r13
pdep %r13, %r10, %r10
xor %r10, 8(%rdi)
mov $0x202020202020202, %r14
pext %r14, %r11, %r10
pdep %r13, %r10, %r10
xor %r10, 16(%rdi)
mov $0x404040404040404, %r15
pext %r15, %r11, %r10
pdep %r13, %r10, %r10
xor %r10, 24(%rdi)
mov $0x808080808080808, %r9
pext %r9, %r11, %r10
pdep %r13, %r10, %r10
xor %r10, 32(%rdi)
mov $0x1010101010101010, %r8
pext %r8, %r11, %r10
pdep %r13, %r10, %r10
xor %r10, 40(%rdi)
mov $0x2020202020202020, %r12
pext %r12, %r11, %r10
pdep %r13, %r10, %r10
xor %r10, 48(%rdi)
mov $0x4040404040404040, %rdx
pext %rdx, %r11, %r10
pdep %r13, %r10, %r10
xor %r10, 56(%rdi)
mov 32(%rsi), %r11
pext %rax, %r11, %r10
mov $0x124924800000000, %rcx
pdep %rcx, %r10, %r10
xor %r10, 0(%rdi)
pext %rbp, %r11, %r10
mov $0x24924900000000, %rbx
pdep %rbx, %r10, %r10
xor %r10, 8(%rdi)
pext %r14, %r11, %r10
pdep %rbx, %r10, %r10
xor %r10, 16(%rdi)
pext %r15, %r11, %r10
pdep %rbx, %r10, %r10
xor %r10, 24(%rdi)
pext %r9, %r11, %r10
pdep %rbx, %r10, %r10
xor %r10, 32(%rdi)
pext %r8, %r11, %r10
pdep %rbx, %r10, %r10
xor %r10, 40(%rdi)
pext %r12, %r11, %r10
pdep %rbx, %r10, %r10
xor %r10, 48(%rdi)
pext %rdx, %r11, %r10
pdep %rbx, %r10, %r10
xor %r10, 56(%rdi)
mov 40(%rsi), %r11
mov $0x4040404040408080, %r13
pext %r13, %r11, %r10
mov $0x9249240000000000, %rax
pdep %rax, %r10, %r10
rol $17, %r10
xor %r10, 0(%rdi)
mov $0x8080808080810101, %rcx
pext %rcx, %r11, %r10
mov $0x9249248000000000, %rbp
pdep %rbp, %r10, %r10
rol $17, %r10
xor %r10, 8(%rdi)
mov $0x101010101020202, %r14
pext %r14, %r11, %r10
pdep %rax, %r10, %r10
rol $14, %r10
xor %r10, 16(%rdi)
mov $0x202020202040404, %r15
pext %r15, %r11, %r10
pdep %rax, %r10, %r10
rol $14, %r10
xor %r10, 24(%rdi)
mov $0x404040404080808, %r9
pext %r9, %r11, %r10
pdep %rax, %r10, %r10
rol $14, %r10
xor %r10, 32(%rdi)
mov $0x808080808101010, %r8
pext %r8, %r11, %r10
pdep %rax, %r10, %r10
rol $14, %r10
xor %r10, 40(%rdi)
mov $0x1010101010202020, %r12
pext %r12, %r11, %r10
pdep %rax, %r10, %r10
rol $14, %r10
xor %r10, 48(%rdi)
mov $0x2020202020004040, %rdx
pext %rdx, %r11, %r10
mov $0x9248240000000000, %rbx
pdep %rbx, %r10, %r10
rol $14, %r10
xor %r10, 56(%rdi)
mov 48(%rsi), %r11
mov $0x4040404040404040, %r13
pext %r13, %r11, %r10
mov $0x12492480000, %rcx
pdep %rcx, %r10, %r10
xor %r10, 0(%rdi)
mov $0x8080808080808080, %rbp
pext %rbp, %r11, %r10
pdep %rcx, %r10, %r10
xor %r10, 8(%rdi)
mov $0x101010101010101, %r14
pext %r14, %r11, %r10
mov $0x2492490000, %r15
pdep %r15, %r10, %r10
xor %r10, 16(%rdi)
mov $0x202020202020202, %r9
pext %r9, %r11, %r10
pdep %r15, %r10, %r10
xor %r10, 24(%rdi)
mov $0x404040404040404, %r8
pext %r8, %r11, %r10
pdep %r15, %r10, %r10
xor %r10, 32(%rdi)
mov $0x808080808080808, %r12
pext %r12, %r11, %r10
pdep %r15, %r10, %r10
xor %r10, 40(%rdi)
mov $0x1010101010101010, %rax
pext %rax, %r11, %r10
pdep %r15, %r10, %r10
xor %r10, 48(%rdi)
mov $0x2020202020202020, %rdx
pext %rdx, %r11, %r10
pdep %r15, %r10, %r10
xor %r10, 56(%rdi)
mov 56(%rsi), %r11
mov $0x40404040404040, %rbx
pext %rbx, %r11, %r10
mov $0x2492480000000000, %r13
pdep %r13, %r10, %r10
xor %r10, 0(%rdi)
mov $0x80808080808080, %rbp
pext %rbp, %r11, %r10
pdep %r13, %r10, %r10
xor %r10, 8(%rdi)
pext %r14, %r11, %r10
mov $0x2492490000000000, %rcx
pdep %rcx, %r10, %r10
xor %r10, 16(%rdi)
pext %r9, %r11, %r10
pdep %rcx, %r10, %r10
xor %r10, 24(%rdi)
pext %r8, %r11, %r10
pdep %rcx, %r10, %r10
xor %r10, 32(%rdi)
pext %r12, %r11, %r10
pdep %rcx, %r10, %r10
xor %r10, 40(%rdi)
pext %rax, %r11, %r10
pdep %rcx, %r10, %r10
xor %r10, 48(%rdi)
mov $0x20202020202020, %rdx
pext %rdx, %r11, %r10
mov $0x492490000000000, %r15
pdep %r15, %r10, %r10
xor %r10, 56(%rdi)
pop %rbp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
ret

+ 0
- 784
src/kem/ntru/ntruhps2048509/avx2/vec32_sample_iid.s View File

@@ -1,784 +0,0 @@
.data
.p2align 5
cast8_to_16:
.byte 255
.byte 0
.byte 255
.byte 1
.byte 255
.byte 2
.byte 255
.byte 3
.byte 255
.byte 4
.byte 255
.byte 5
.byte 255
.byte 6
.byte 255
.byte 7
.byte 255
.byte 0
.byte 255
.byte 1
.byte 255
.byte 2
.byte 255
.byte 3
.byte 255
.byte 4
.byte 255
.byte 5
.byte 255
.byte 6
.byte 255
.byte 7
mask_ff:
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
.word 0xff
mask_f:
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
.word 0xf
mask_3:
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.word 0x03
.text
.global PQCLEAN_NTRUHPS2048509_AVX2_vec32_sample_iid
.global _PQCLEAN_NTRUHPS2048509_AVX2_vec32_sample_iid
PQCLEAN_NTRUHPS2048509_AVX2_vec32_sample_iid:
_PQCLEAN_NTRUHPS2048509_AVX2_vec32_sample_iid:
vmovdqa 0(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 0(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 32(%rdi)
vmovdqa 32(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 64(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 96(%rdi)
vmovdqa 64(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 128(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 160(%rdi)
vmovdqa 96(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 192(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 224(%rdi)
vmovdqa 128(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 256(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 288(%rdi)
vmovdqa 160(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 320(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 352(%rdi)
vmovdqa 192(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 384(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 416(%rdi)
vmovdqa 224(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 448(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 480(%rdi)
vmovdqa 256(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 512(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 544(%rdi)
vmovdqa 288(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 576(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 608(%rdi)
vmovdqa 320(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 640(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 672(%rdi)
vmovdqa 352(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 704(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 736(%rdi)
vmovdqa 384(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 768(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 800(%rdi)
vmovdqa 416(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 832(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 864(%rdi)
vmovdqa 448(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 896(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 928(%rdi)
vmovdqa 480(%rsi), %ymm3
vextracti128 $0, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 960(%rdi)
vextracti128 $1, %ymm3, %xmm1
vpermq $216, %ymm1, %ymm1
vpshufb cast8_to_16(%rip), %ymm1, %ymm1
vpsrlw $8, %ymm1, %ymm2
vpand mask_ff(%rip), %ymm1, %ymm1
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_f(%rip), %ymm2, %ymm1
vpsrlw $4, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpand mask_3(%rip), %ymm2, %ymm1
vpsrlw $2, %ymm2, %ymm2
vpaddw %ymm2, %ymm1, %ymm2
vpsubw mask_3(%rip), %ymm2, %ymm14
vpsraw $15, %ymm14, %ymm15
vpandn %ymm14, %ymm15, %ymm1
vpand %ymm15, %ymm2, %ymm14
vpxor %ymm14, %ymm1, %ymm2
vmovdqa %ymm2, 992(%rdi)
movw $0, 1016(%rdi)
movw $0, 1018(%rdi)
movw $0, 1020(%rdi)
movw $0, 1022(%rdi)
ret

+ 0
- 20
src/kem/ntru/ntruhps2048509/clean/CMakeLists.txt View File

@@ -1,20 +0,0 @@
set(
SRC_CLEAN_NTRUHPS2048509
cmov.c
crypto_sort_int32.c
kem.c
owcpa.c
pack3.c
packq.c
poly.c
poly_lift.c
poly_mod.c
poly_r2_inv.c
poly_rq_mul.c
poly_s3_inv.c
sample.c
sample_iid.c
)

define_kem_alg(ntruhps2048509_clean
PQCLEAN_NTRUHPS2048509_CLEAN "${SRC_CLEAN_NTRUHPS2048509}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 0
- 19
src/kem/ntru/ntruhps2048509/clean/api.h View File

@@ -1,19 +0,0 @@
#ifndef PQCLEAN_NTRUHPS2048509_CLEAN_API_H
#define PQCLEAN_NTRUHPS2048509_CLEAN_API_H

#include <stdint.h>

#define PQCLEAN_NTRUHPS2048509_CLEAN_CRYPTO_SECRETKEYBYTES 935
#define PQCLEAN_NTRUHPS2048509_CLEAN_CRYPTO_PUBLICKEYBYTES 699
#define PQCLEAN_NTRUHPS2048509_CLEAN_CRYPTO_CIPHERTEXTBYTES 699
#define PQCLEAN_NTRUHPS2048509_CLEAN_CRYPTO_BYTES 32

#define PQCLEAN_NTRUHPS2048509_CLEAN_CRYPTO_ALGNAME "ntruhps2048509"

int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk);

int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk);

int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_dec(uint8_t *k, const uint8_t *c, const uint8_t *sk);

#endif

+ 0
- 11
src/kem/ntru/ntruhps2048509/clean/cmov.c View File

@@ -1,11 +0,0 @@
#include "cmov.h"

/* b = 1 means mov, b = 0 means don't mov*/
void PQCLEAN_NTRUHPS2048509_CLEAN_cmov(unsigned char *r, const unsigned char *x, size_t len, unsigned char b) {
size_t i;

b = (~b + 1);
for (i = 0; i < len; i++) {
r[i] ^= b & (x[i] ^ r[i]);
}
}

+ 0
- 10
src/kem/ntru/ntruhps2048509/clean/cmov.h View File

@@ -1,10 +0,0 @@
#ifndef VERIFY_H
#define VERIFY_H

#include "params.h"

#include <stddef.h>

void PQCLEAN_NTRUHPS2048509_CLEAN_cmov(unsigned char *r, const unsigned char *x, size_t len, unsigned char b);

#endif

+ 0
- 86
src/kem/ntru/ntruhps2048509/clean/crypto_sort_int32.c View File

@@ -1,86 +0,0 @@
// Based on supercop-20190110/crypto_sort/int32/x86

#include "crypto_sort_int32.h"

#include <stdint.h>
#define int32 int32_t

#define int32_MINMAX(a,b) \
do { \
int32_t ab = (b) ^ (a); \
int32_t c = (int32_t)((int64_t)(b) - (int64_t)(a)); \
c ^= ab & (c ^ (b)); \
c >>= 31; \
c &= ab; \
(a) ^= c; \
(b) ^= c; \
} while(0)

/* assume 2 <= n <= 0x40000000 */
void PQCLEAN_NTRUHPS2048509_CLEAN_crypto_sort_int32(int32 *array, size_t n) {
size_t top, p, q, r, i, j;
int32 *x = array;

top = 1;
while (top < n - top) {
top += top;
}

for (p = top; p >= 1; p >>= 1) {
i = 0;
while (i + 2 * p <= n) {
for (j = i; j < i + p; ++j) {
int32_MINMAX(x[j], x[j + p]);
}
i += 2 * p;
}
for (j = i; j < n - p; ++j) {
int32_MINMAX(x[j], x[j + p]);
}

i = 0;
j = 0;
for (q = top; q > p; q >>= 1) {
if (j != i) {
for (;;) {
if (j == n - q) {
goto done;
}
int32 a = x[j + p];
for (r = q; r > p; r >>= 1) {
int32_MINMAX(a, x[j + r]);
}
x[j + p] = a;
++j;
if (j == i + p) {
i += 2 * p;
break;
}
}
}
while (i + p <= n - q) {
for (j = i; j < i + p; ++j) {
int32 a = x[j + p];
for (r = q; r > p; r >>= 1) {
int32_MINMAX(a, x[j + r]);
}
x[j + p] = a;
}
i += 2 * p;
}
/* now i + p > n - q */
j = i;
while (j < n - q) {
int32 a = x[j + p];
for (r = q; r > p; r >>= 1) {
int32_MINMAX(a, x[j + r]);
}
x[j + p] = a;
++j;
}

done:
;
}
}
}

+ 0
- 11
src/kem/ntru/ntruhps2048509/clean/crypto_sort_int32.h View File

@@ -1,11 +0,0 @@
#ifndef CRYPTO_SORT
#define CRYPTO_SORT

#include "params.h"

#include <stddef.h>
#include <stdint.h>

void PQCLEAN_NTRUHPS2048509_CLEAN_crypto_sort_int32(int32_t *array, size_t n);

#endif

+ 0
- 63
src/kem/ntru/ntruhps2048509/clean/kem.c View File

@@ -1,63 +0,0 @@
#include "api.h"
#include "cmov.h"
#include "fips202.h"
#include "owcpa.h"
#include "params.h"
#include "randombytes.h"
#include "sample.h"

// API FUNCTIONS
int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
uint8_t seed[NTRU_SAMPLE_FG_BYTES];

randombytes(seed, NTRU_SAMPLE_FG_BYTES);
PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_keypair(pk, sk, seed);

randombytes(sk + NTRU_OWCPA_SECRETKEYBYTES, NTRU_PRFKEYBYTES);

return 0;
}

int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk) {
poly r, m;
uint8_t rm[NTRU_OWCPA_MSGBYTES];
uint8_t rm_seed[NTRU_SAMPLE_RM_BYTES];

randombytes(rm_seed, NTRU_SAMPLE_RM_BYTES);

PQCLEAN_NTRUHPS2048509_CLEAN_sample_rm(&r, &m, rm_seed);

PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(rm, &r);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, &m);
sha3_256(k, rm, NTRU_OWCPA_MSGBYTES);

PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(&r);
PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_enc(c, &r, &m, pk);

return 0;
}

int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_dec(uint8_t *k, const uint8_t *c, const uint8_t *sk) {
int i, fail;
uint8_t rm[NTRU_OWCPA_MSGBYTES];
uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES];

fail = PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec(rm, c, sk);
/* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */
/* See comment in PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec for details. */

sha3_256(k, rm, NTRU_OWCPA_MSGBYTES);

/* shake(secret PRF key || input ciphertext) */
for (i = 0; i < NTRU_PRFKEYBYTES; i++) {
buf[i] = sk[i + NTRU_OWCPA_SECRETKEYBYTES];
}
for (i = 0; i < NTRU_CIPHERTEXTBYTES; i++) {
buf[NTRU_PRFKEYBYTES + i] = c[i];
}
sha3_256(rm, buf, NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES);

PQCLEAN_NTRUHPS2048509_CLEAN_cmov(k, rm, NTRU_SHAREDKEYBYTES, (unsigned char) fail);

return 0;
}

+ 0
- 183
src/kem/ntru/ntruhps2048509/clean/owcpa.c View File

@@ -1,183 +0,0 @@
#include "owcpa.h"
#include "poly.h"
#include "sample.h"

static int owcpa_check_ciphertext(const unsigned char *ciphertext) {
/* A ciphertext is log2(q)*(n-1) bits packed into bytes. */
/* Check that any unused bits of the final byte are zero. */

uint16_t t = 0;

t = ciphertext[NTRU_CIPHERTEXTBYTES - 1];
t &= 0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG)));

/* We have 0 <= t < 256 */
/* Return 0 on success (t=0), 1 on failure */
return (int) (1 & ((~t + 1) >> 15));
}

static int owcpa_check_r(const poly *r) {
/* A valid r has coefficients in {0,1,q-1} and has r[N-1] = 0 */
/* Note: We may assume that 0 <= r[i] <= q-1 for all i */

int i;
uint32_t t = 0;
uint16_t c;
for (i = 0; i < NTRU_N - 1; i++) {
c = r->coeffs[i];
t |= (c + 1) & (NTRU_Q - 4); /* 0 iff c is in {-1,0,1,2} */
t |= (c + 2) & 4; /* 1 if c = 2, 0 if c is in {-1,0,1} */
}
t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */

/* We have 0 <= t < 2^16. */
/* Return 0 on success (t=0), 1 on failure */
return (int) (1 & ((~t + 1) >> 31));
}

static int owcpa_check_m(const poly *m) {
/* Check that m is in message space, i.e. */
/* (1) |{i : m[i] = 1}| = |{i : m[i] = 2}|, and */
/* (2) |{i : m[i] != 0}| = NTRU_WEIGHT. */
/* Note: We may assume that m has coefficients in {0,1,2}. */

int i;
uint32_t t = 0;
uint16_t ps = 0;
uint16_t ms = 0;
for (i = 0; i < NTRU_N; i++) {
ps += m->coeffs[i] & 1;
ms += m->coeffs[i] & 2;
}
t |= ps ^ (ms >> 1); /* 0 if (1) holds */
t |= ms ^ NTRU_WEIGHT; /* 0 if (1) and (2) hold */

/* We have 0 <= t < 2^16. */
/* Return 0 on success (t=0), 1 on failure */
return (int) (1 & ((~t + 1) >> 31));
}

void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_keypair(unsigned char *pk,
unsigned char *sk,
const unsigned char seed[NTRU_SAMPLE_FG_BYTES]) {
int i;

poly x1, x2, x3, x4, x5;

poly *f = &x1, *g = &x2, *invf_mod3 = &x3;
poly *gf = &x3, *invgf = &x4, *tmp = &x5;
poly *invh = &x3, *h = &x3;

PQCLEAN_NTRUHPS2048509_CLEAN_sample_fg(f, g, seed);

PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_inv(invf_mod3, f);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(sk, f);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(sk + NTRU_PACK_TRINARY_BYTES, invf_mod3);

/* Lift coeffs of f and g from Z_p to Z_q */
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(f);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(g);


/* g = 3*g */
for (i = 0; i < NTRU_N; i++) {
g->coeffs[i] = 3 * g->coeffs[i];
}

PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(gf, g, f);

PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_inv(invgf, gf);

PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(tmp, invgf, f);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_mul(invh, tmp, f);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_tobytes(sk + 2 * NTRU_PACK_TRINARY_BYTES, invh);

PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(tmp, invgf, g);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(h, tmp, g);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_tobytes(pk, h);
}


void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_enc(unsigned char *c,
const poly *r,
const poly *m,
const unsigned char *pk) {
int i;
poly x1, x2;
poly *h = &x1, *liftm = &x1;
poly *ct = &x2;

PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_frombytes(h, pk);

PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(ct, r, h);

PQCLEAN_NTRUHPS2048509_CLEAN_poly_lift(liftm, m);
for (i = 0; i < NTRU_N; i++) {
ct->coeffs[i] = ct->coeffs[i] + liftm->coeffs[i];
}

PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_tobytes(c, ct);
}

int PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec(unsigned char *rm,
const unsigned char *ciphertext,
const unsigned char *secretkey) {
int i;
int fail;
poly x1, x2, x3, x4;

poly *c = &x1, *f = &x2, *cf = &x3;
poly *mf = &x2, *finv3 = &x3, *m = &x4;
poly *liftm = &x2, *invh = &x3, *r = &x4;
poly *b = &x1;

PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_frombytes(c, ciphertext);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_frombytes(f, secretkey);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(f);

PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(cf, c, f);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_to_S3(mf, cf);

PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_frombytes(finv3, secretkey + NTRU_PACK_TRINARY_BYTES);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_mul(m, mf, finv3);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, m);

fail = 0;

/* Check that the unused bits of the last byte of the ciphertext are zero */
fail |= owcpa_check_ciphertext(ciphertext);

/* For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
/* We can avoid re-computing r*h + Lift(m) as long as we check that */
/* r (defined as b/h mod (q, Phi_n)) and m are in the message space. */
/* (m can take any value in S3 in NTRU_HRSS) */
fail |= owcpa_check_m(m);

/* b = c - Lift(m) mod (q, x^n - 1) */
PQCLEAN_NTRUHPS2048509_CLEAN_poly_lift(liftm, m);
for (i = 0; i < NTRU_N; i++) {
b->coeffs[i] = c->coeffs[i] - liftm->coeffs[i];
}

/* r = b / h mod (q, Phi_n) */
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_frombytes(invh, secretkey + 2 * NTRU_PACK_TRINARY_BYTES);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_mul(r, b, invh);

/* NOTE: Our definition of r as b/h mod (q, Phi_n) follows Figure 4 of */
/* [Sch18] https://eprint.iacr.org/2018/1174/20181203:032458. */
/* This differs from Figure 10 of Saito--Xagawa--Yamakawa */
/* [SXY17] https://eprint.iacr.org/2017/1005/20180516:055500 */
/* where r gets a final reduction modulo p. */
/* We need this change to use Proposition 1 of [Sch18]. */

/* Proposition 1 of [Sch18] shows that re-encryption with (r,m) yields c. */
/* if and only if fail==0 after the following call to owcpa_check_r */
/* The procedure given in Fig. 8 of [Sch18] can be skipped because we have */
/* c(1) = 0 due to the use of poly_Rq_sum_zero_{to,from}bytes. */
fail |= owcpa_check_r(r);

PQCLEAN_NTRUHPS2048509_CLEAN_poly_trinary_Zq_to_Z3(r);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(rm, r);

return fail;
}

+ 0
- 19
src/kem/ntru/ntruhps2048509/clean/owcpa.h View File

@@ -1,19 +0,0 @@
#ifndef OWCPA_H
#define OWCPA_H

#include "params.h"
#include "poly.h"

void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_keypair(unsigned char *pk,
unsigned char *sk,
const unsigned char seed[NTRU_SAMPLE_FG_BYTES]);

void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_enc(unsigned char *c,
const poly *r,
const poly *m,
const unsigned char *pk);

int PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec(unsigned char *rm,
const unsigned char *ciphertext,
const unsigned char *secretkey);
#endif

+ 0
- 46
src/kem/ntru/ntruhps2048509/clean/pack3.c View File

@@ -1,46 +0,0 @@
#include "poly.h"

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(unsigned char msg[NTRU_OWCPA_MSGBYTES], const poly *a) {
int i;
unsigned char c;
int j;

for (i = 0; i < NTRU_PACK_DEG / 5; i++) {
c = a->coeffs[5 * i + 4] & 255;
c = (3 * c + a->coeffs[5 * i + 3]) & 255;
c = (3 * c + a->coeffs[5 * i + 2]) & 255;
c = (3 * c + a->coeffs[5 * i + 1]) & 255;
c = (3 * c + a->coeffs[5 * i + 0]) & 255;
msg[i] = c;
}
i = NTRU_PACK_DEG / 5;
c = 0;
for (j = NTRU_PACK_DEG - (5 * i) - 1; j >= 0; j--) {
c = (3 * c + a->coeffs[5 * i + j]) & 255;
}
msg[i] = c;
}

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_frombytes(poly *r, const unsigned char msg[NTRU_OWCPA_MSGBYTES]) {
int i;
unsigned char c;
int j;

for (i = 0; i < NTRU_PACK_DEG / 5; i++) {
c = msg[i];
r->coeffs[5 * i + 0] = c;
r->coeffs[5 * i + 1] = c * 171 >> 9; // this is division by 3
r->coeffs[5 * i + 2] = c * 57 >> 9; // division by 3^2
r->coeffs[5 * i + 3] = c * 19 >> 9; // division by 3^3
r->coeffs[5 * i + 4] = c * 203 >> 14; // etc.
}
i = NTRU_PACK_DEG / 5;
c = msg[i];
for (j = 0; (5 * i + j) < NTRU_PACK_DEG; j++) {
r->coeffs[5 * i + j] = c;
c = c * 171 >> 9;
}
r->coeffs[NTRU_N - 1] = 0;
PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(r);
}


+ 0
- 93
src/kem/ntru/ntruhps2048509/clean/packq.c View File

@@ -1,93 +0,0 @@
#include "poly.h"

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_tobytes(unsigned char *r, const poly *a) {
int i, j;
uint16_t t[8];

for (i = 0; i < NTRU_PACK_DEG / 8; i++) {
for (j = 0; j < 8; j++) {
t[j] = MODQ(a->coeffs[8 * i + j]);
}

r[11 * i + 0] = (unsigned char) ( t[0] & 0xff);
r[11 * i + 1] = (unsigned char) ((t[0] >> 8) | ((t[1] & 0x1f) << 3));
r[11 * i + 2] = (unsigned char) ((t[1] >> 5) | ((t[2] & 0x03) << 6));
r[11 * i + 3] = (unsigned char) ((t[2] >> 2) & 0xff);
r[11 * i + 4] = (unsigned char) ((t[2] >> 10) | ((t[3] & 0x7f) << 1));
r[11 * i + 5] = (unsigned char) ((t[3] >> 7) | ((t[4] & 0x0f) << 4));
r[11 * i + 6] = (unsigned char) ((t[4] >> 4) | ((t[5] & 0x01) << 7));
r[11 * i + 7] = (unsigned char) ((t[5] >> 1) & 0xff);
r[11 * i + 8] = (unsigned char) ((t[5] >> 9) | ((t[6] & 0x3f) << 2));
r[11 * i + 9] = (unsigned char) ((t[6] >> 6) | ((t[7] & 0x07) << 5));
r[11 * i + 10] = (unsigned char) ((t[7] >> 3));
}

for (j = 0; j < NTRU_PACK_DEG - 8 * i; j++) {
t[j] = MODQ(a->coeffs[8 * i + j]);
}
for (; j < 8; j++) {
t[j] = 0;
}

switch (NTRU_PACK_DEG & 0x07) {
// cases 0 and 6 are impossible since 2 generates (Z/n)* and
// p mod 8 in {1, 7} implies that 2 is a quadratic residue.
case 4:
r[11 * i + 0] = (unsigned char) (t[0] & 0xff);
r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3);
r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6);
r[11 * i + 3] = (unsigned char) (t[2] >> 2) & 0xff;
r[11 * i + 4] = (unsigned char) (t[2] >> 10) | ((t[3] & 0x7f) << 1);
r[11 * i + 5] = (unsigned char) (t[3] >> 7) | ((t[4] & 0x0f) << 4);
break;
case 2:
r[11 * i + 0] = (unsigned char) (t[0] & 0xff);
r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3);
r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6);
break;
}
}

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_frombytes(poly *r, const unsigned char *a) {
int i;
for (i = 0; i < NTRU_PACK_DEG / 8; i++) {
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8);
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5);
r->coeffs[8 * i + 2] = (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10);
r->coeffs[8 * i + 3] = (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7);
r->coeffs[8 * i + 4] = (a[11 * i + 5] >> 4) | (((uint16_t)a[11 * i + 6] & 0x7f) << 4);
r->coeffs[8 * i + 5] = (a[11 * i + 6] >> 7) | (((uint16_t)a[11 * i + 7] & 0xff) << 1) | (((uint16_t)a[11 * i + 8] & 0x03) << 9);
r->coeffs[8 * i + 6] = (a[11 * i + 8] >> 2) | (((uint16_t)a[11 * i + 9] & 0x1f) << 6);
r->coeffs[8 * i + 7] = (a[11 * i + 9] >> 5) | (((uint16_t)a[11 * i + 10] & 0xff) << 3);
}
switch (NTRU_PACK_DEG & 0x07) {
// cases 0 and 6 are impossible since 2 generates (Z/n)* and
// p mod 8 in {1, 7} implies that 2 is a quadratic residue.
case 4:
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8);
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5);
r->coeffs[8 * i + 2] = (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10);
r->coeffs[8 * i + 3] = (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7);
break;
case 2:
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8);
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5);
break;
}
r->coeffs[NTRU_N - 1] = 0;
}

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a) {
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_tobytes(r, a);
}

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_frombytes(poly *r, const unsigned char *a) {
int i;
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_frombytes(r, a);

/* Set r[n-1] so that the sum of coefficients is zero mod q */
r->coeffs[NTRU_N - 1] = 0;
for (i = 0; i < NTRU_PACK_DEG; i++) {
r->coeffs[NTRU_N - 1] -= r->coeffs[i];
}
}

+ 0
- 37
src/kem/ntru/ntruhps2048509/clean/params.h View File

@@ -1,37 +0,0 @@
#ifndef PARAMS_H
#define PARAMS_H

#define NTRU_HPS
#define NTRU_N 509
#define NTRU_LOGQ 11


/* Do not modify below this line */

#define PAD32(X) ((((X) + 31)/32)*32)

#define NTRU_Q (1 << NTRU_LOGQ)
#define NTRU_WEIGHT (NTRU_Q/8 - 2)

#define NTRU_SEEDBYTES 32
#define NTRU_PRFKEYBYTES 32
#define NTRU_SHAREDKEYBYTES 32

#define NTRU_SAMPLE_IID_BYTES (NTRU_N-1)
#define NTRU_SAMPLE_FT_BYTES ((30*(NTRU_N-1)+7)/8)
#define NTRU_SAMPLE_FG_BYTES (NTRU_SAMPLE_IID_BYTES+NTRU_SAMPLE_FT_BYTES)
#define NTRU_SAMPLE_RM_BYTES (NTRU_SAMPLE_IID_BYTES+NTRU_SAMPLE_FT_BYTES)

#define NTRU_PACK_DEG (NTRU_N-1)
#define NTRU_PACK_TRINARY_BYTES ((NTRU_PACK_DEG+4)/5)

#define NTRU_OWCPA_MSGBYTES (2*NTRU_PACK_TRINARY_BYTES)
#define NTRU_OWCPA_PUBLICKEYBYTES ((NTRU_LOGQ*NTRU_PACK_DEG+7)/8)
#define NTRU_OWCPA_SECRETKEYBYTES (2*NTRU_PACK_TRINARY_BYTES + NTRU_OWCPA_PUBLICKEYBYTES)
#define NTRU_OWCPA_BYTES ((NTRU_LOGQ*NTRU_PACK_DEG+7)/8)

#define NTRU_PUBLICKEYBYTES (NTRU_OWCPA_PUBLICKEYBYTES)
#define NTRU_SECRETKEYBYTES (NTRU_OWCPA_SECRETKEYBYTES + NTRU_PRFKEYBYTES)
#define NTRU_CIPHERTEXTBYTES (NTRU_OWCPA_BYTES)

#endif

+ 0
- 75
src/kem/ntru/ntruhps2048509/clean/poly.c View File

@@ -1,75 +0,0 @@
#include "poly.h"

/* Map {0, 1, 2} -> {0,1,q-1} in place */
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(poly *r) {
int i;
for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = r->coeffs[i] | ((-(r->coeffs[i] >> 1)) & (NTRU_Q - 1));
}
}

/* Map {0, 1, q-1} -> {0,1,2} in place */
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_trinary_Zq_to_Z3(poly *r) {
int i;
for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = MODQ(r->coeffs[i]);
r->coeffs[i] = 3 & (r->coeffs[i] ^ (r->coeffs[i] >> (NTRU_LOGQ - 1)));
}
}

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_mul(poly *r, const poly *a, const poly *b) {
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(r, a, b);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_q_Phi_n(r);
}

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_mul(poly *r, const poly *a, const poly *b) {
int i;

/* Our S3 multiplications do not overflow mod q, */
/* so we can re-purpose PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul, as long as we */
/* follow with an explicit reduction mod q. */
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(r, a, b);
for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = MODQ(r->coeffs[i]);
}
PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(r);
}

static void PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) {

int i;
poly b, c;
poly s;

// for 0..4
// ai = ai * (2 - a*ai) mod q
for (i = 0; i < NTRU_N; i++) {
b.coeffs[i] = -(a->coeffs[i]);
}

for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = ai->coeffs[i];
}

PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(&c, r, &b);
c.coeffs[0] += 2; // c = 2 - a*ai
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(&s, &c, r); // s = ai*c

PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(&c, &s, &b);
c.coeffs[0] += 2; // c = 2 - a*s
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(r, &c, &s); // r = s*c

PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(&c, r, &b);
c.coeffs[0] += 2; // c = 2 - a*r
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(&s, &c, r); // s = r*c

PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(&c, &s, &b);
c.coeffs[0] += 2; // c = 2 - a*s
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(r, &c, &s); // r = s*c
}

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_inv(poly *r, const poly *a) {
poly ai2;
PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv(&ai2, a);
PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv_to_Rq_inv(r, &ai2, a);
}

+ 0
- 39
src/kem/ntru/ntruhps2048509/clean/poly.h View File

@@ -1,39 +0,0 @@
#ifndef POLY_H
#define POLY_H

#include "params.h"

#include <stddef.h>
#include <stdint.h>

#define MODQ(X) ((X) & (NTRU_Q-1))

typedef struct {
uint16_t coeffs[NTRU_N];
} poly;

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(poly *r);
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_q_Phi_n(poly *r);

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_tobytes(unsigned char *r, const poly *a);
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_frombytes(poly *r, const unsigned char *a);

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a);
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_sum_zero_frombytes(poly *r, const unsigned char *a);

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(unsigned char msg[NTRU_PACK_TRINARY_BYTES], const poly *a);
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_frombytes(poly *r, const unsigned char msg[NTRU_PACK_TRINARY_BYTES]);

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Sq_mul(poly *r, const poly *a, const poly *b);
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b);
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_mul(poly *r, const poly *a, const poly *b);
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_lift(poly *r, const poly *a);
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_to_S3(poly *r, const poly *a);

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv(poly *r, const poly *a);
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_inv(poly *r, const poly *a);
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_inv(poly *r, const poly *a);

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(poly *r);
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_trinary_Zq_to_Z3(poly *r);
#endif

+ 0
- 11
src/kem/ntru/ntruhps2048509/clean/poly_lift.c View File

@@ -1,11 +0,0 @@
#include "poly.h"

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_lift(poly *r, const poly *a) {
int i;
for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = a->coeffs[i];
}
PQCLEAN_NTRUHPS2048509_CLEAN_poly_Z3_to_Zq(r);
}



+ 0
- 53
src/kem/ntru/ntruhps2048509/clean/poly_mod.c View File

@@ -1,53 +0,0 @@
#include "poly.h"

static uint16_t mod3(uint16_t a) {
uint16_t r;
int16_t t, c;

r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255
r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15
r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3
r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3

t = r - 3;
c = t >> 15;

return (c & r) ^ (~c & t);
}

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(poly *r) {
int i;
for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = mod3(r->coeffs[i] + 2 * r->coeffs[NTRU_N - 1]);
}
}

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_q_Phi_n(poly *r) {
int i;
for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = r->coeffs[i] - r->coeffs[NTRU_N - 1];
}
}

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_to_S3(poly *r, const poly *a) {
int i;
uint16_t flag;

/* The coefficients of a are stored as non-negative integers. */
/* We must translate to representatives in [-q/2, q/2) before */
/* reduction mod 3. */
for (i = 0; i < NTRU_N; i++) {
/* Need an explicit reduction mod q here */
r->coeffs[i] = MODQ(a->coeffs[i]);

/* flag = 1 if r[i] >= q/2 else 0 */
flag = r->coeffs[i] >> (NTRU_LOGQ - 1);

/* Now we will add (-q) mod 3 if r[i] >= q/2 */
/* Note (-q) mod 3=(-2^k) mod 3=1<<(1-(k&1)) */
r->coeffs[i] += flag << (1 - (NTRU_LOGQ & 1));
}

PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(r);
}


+ 0
- 69
src/kem/ntru/ntruhps2048509/clean/poly_r2_inv.c View File

@@ -1,69 +0,0 @@
/* Based on supercop-20200702/crypto_core/invhrss701/simpler/core.c */

#include "poly.h"

/* return -1 if x<0 and y<0; otherwise return 0 */
static inline int16_t both_negative_mask(int16_t x, int16_t y) {
return (x & y) >> 15;
}

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv(poly *r, const poly *a) {
poly f, g, v, w;
size_t i, loop;
int16_t delta, sign, swap, t;

for (i = 0; i < NTRU_N; ++i) {
v.coeffs[i] = 0;
}
for (i = 0; i < NTRU_N; ++i) {
w.coeffs[i] = 0;
}
w.coeffs[0] = 1;

for (i = 0; i < NTRU_N; ++i) {
f.coeffs[i] = 1;
}
for (i = 0; i < NTRU_N - 1; ++i) {
g.coeffs[NTRU_N - 2 - i] = (a->coeffs[i] ^ a->coeffs[NTRU_N - 1]) & 1;
}
g.coeffs[NTRU_N - 1] = 0;

delta = 1;

for (loop = 0; loop < 2 * (NTRU_N - 1) - 1; ++loop) {
for (i = NTRU_N - 1; i > 0; --i) {
v.coeffs[i] = v.coeffs[i - 1];
}
v.coeffs[0] = 0;

sign = g.coeffs[0] & f.coeffs[0];
swap = both_negative_mask(-delta, -(int16_t) g.coeffs[0]);
delta ^= swap & (delta ^ -delta);
delta += 1;

for (i = 0; i < NTRU_N; ++i) {
t = swap & (f.coeffs[i] ^ g.coeffs[i]);
f.coeffs[i] ^= t;
g.coeffs[i] ^= t;
t = swap & (v.coeffs[i] ^ w.coeffs[i]);
v.coeffs[i] ^= t;
w.coeffs[i] ^= t;
}

for (i = 0; i < NTRU_N; ++i) {
g.coeffs[i] = g.coeffs[i] ^ (sign & f.coeffs[i]);
}
for (i = 0; i < NTRU_N; ++i) {
w.coeffs[i] = w.coeffs[i] ^ (sign & v.coeffs[i]);
}
for (i = 0; i < NTRU_N - 1; ++i) {
g.coeffs[i] = g.coeffs[i + 1];
}
g.coeffs[NTRU_N - 1] = 0;
}

for (i = 0; i < NTRU_N - 1; ++i) {
r->coeffs[i] = v.coeffs[NTRU_N - 2 - i];
}
r->coeffs[NTRU_N - 1] = 0;
}

+ 0
- 284
src/kem/ntru/ntruhps2048509/clean/poly_rq_mul.c View File

@@ -1,284 +0,0 @@
#include "poly.h"

/* Polynomial multiplication using */
/* Toom-4 and two layers of Karatsuba. */

#define L PAD32(NTRU_N)
#define M (L/4)
#define K (L/16)

static void toom4_k2x2_mul(uint16_t ab[2 * L], const uint16_t a[L], const uint16_t b[L]);

static void toom4_k2x2_eval_0(uint16_t r[9 * K], const uint16_t a[M]);
static void toom4_k2x2_eval_p1(uint16_t r[9 * K], const uint16_t a[M]);
static void toom4_k2x2_eval_m1(uint16_t r[9 * K], const uint16_t a[M]);
static void toom4_k2x2_eval_p2(uint16_t r[9 * K], const uint16_t a[M]);
static void toom4_k2x2_eval_m2(uint16_t r[9 * K], const uint16_t a[M]);
static void toom4_k2x2_eval_p3(uint16_t r[9 * K], const uint16_t a[M]);
static void toom4_k2x2_eval_inf(uint16_t r[9 * K], const uint16_t a[M]);
static inline void k2x2_eval(uint16_t r[9 * K]);

static void toom4_k2x2_basemul(uint16_t r[18 * K], const uint16_t a[9 * K], const uint16_t b[9 * K]);
static inline void schoolbook_KxK(uint16_t r[2 * K], const uint16_t a[K], const uint16_t b[K]);

static void toom4_k2x2_interpolate(uint16_t r[2 * M], const uint16_t a[63 * 2 * K]);
static inline void k2x2_interpolate(uint16_t r[M], const uint16_t a[9 * K]);

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_mul(poly *r, const poly *a, const poly *b) {
size_t i;
uint16_t ab[2 * L];

for (i = 0; i < NTRU_N; i++) {
ab[i] = a->coeffs[i];
ab[L + i] = b->coeffs[i];
}
for (i = NTRU_N; i < L; i++) {
ab[i] = 0;
ab[L + i] = 0;
}

toom4_k2x2_mul(ab, ab, ab + L);

for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = ab[i] + ab[NTRU_N + i];
}
}

static void toom4_k2x2_mul(uint16_t ab[2 * L], const uint16_t a[L], const uint16_t b[L]) {
uint16_t tmpA[9 * K];
uint16_t tmpB[9 * K];
uint16_t eC[63 * 2 * K];

toom4_k2x2_eval_0(tmpA, a);
toom4_k2x2_eval_0(tmpB, b);
toom4_k2x2_basemul(eC + 0 * 9 * 2 * K, tmpA, tmpB);

toom4_k2x2_eval_p1(tmpA, a);
toom4_k2x2_eval_p1(tmpB, b);
toom4_k2x2_basemul(eC + 1 * 9 * 2 * K, tmpA, tmpB);

toom4_k2x2_eval_m1(tmpA, a);
toom4_k2x2_eval_m1(tmpB, b);
toom4_k2x2_basemul(eC + 2 * 9 * 2 * K, tmpA, tmpB);

toom4_k2x2_eval_p2(tmpA, a);
toom4_k2x2_eval_p2(tmpB, b);
toom4_k2x2_basemul(eC + 3 * 9 * 2 * K, tmpA, tmpB);

toom4_k2x2_eval_m2(tmpA, a);
toom4_k2x2_eval_m2(tmpB, b);
toom4_k2x2_basemul(eC + 4 * 9 * 2 * K, tmpA, tmpB);

toom4_k2x2_eval_p3(tmpA, a);
toom4_k2x2_eval_p3(tmpB, b);
toom4_k2x2_basemul(eC + 5 * 9 * 2 * K, tmpA, tmpB);

toom4_k2x2_eval_inf(tmpA, a);
toom4_k2x2_eval_inf(tmpB, b);
toom4_k2x2_basemul(eC + 6 * 9 * 2 * K, tmpA, tmpB);

toom4_k2x2_interpolate(ab, eC);
}


static void toom4_k2x2_eval_0(uint16_t r[9 * K], const uint16_t a[M]) {
for (size_t i = 0; i < M; i++) {
r[i] = a[i];
}
k2x2_eval(r);
}

static void toom4_k2x2_eval_p1(uint16_t r[9 * K], const uint16_t a[M]) {
for (size_t i = 0; i < M; i++) {
r[i] = a[0 * M + i];
r[i] += a[1 * M + i];
r[i] += a[2 * M + i];
r[i] += a[3 * M + i];
}
k2x2_eval(r);
}

static void toom4_k2x2_eval_m1(uint16_t r[9 * K], const uint16_t a[M]) {
for (size_t i = 0; i < M; i++) {
r[i] = a[0 * M + i];
r[i] -= a[1 * M + i];
r[i] += a[2 * M + i];
r[i] -= a[3 * M + i];
}
k2x2_eval(r);
}

static void toom4_k2x2_eval_p2(uint16_t r[9 * K], const uint16_t a[M]) {
for (size_t i = 0; i < M; i++) {
r[i] = a[0 * M + i];
r[i] += 2 * a[1 * M + i];
r[i] += 4 * a[2 * M + i];
r[i] += 8 * a[3 * M + i];
}
k2x2_eval(r);
}

static void toom4_k2x2_eval_m2(uint16_t r[9 * K], const uint16_t a[M]) {
for (size_t i = 0; i < M; i++) {
r[i] = a[0 * M + i];
r[i] -= 2 * a[1 * M + i];
r[i] += 4 * a[2 * M + i];
r[i] -= 8 * a[3 * M + i];
}
k2x2_eval(r);
}

static void toom4_k2x2_eval_p3(uint16_t r[9 * K], const uint16_t a[M]) {
for (size_t i = 0; i < M; i++) {
r[i] = a[0 * M + i];
r[i] += 3 * a[1 * M + i];
r[i] += 9 * a[2 * M + i];
r[i] += 27 * a[3 * M + i];
}
k2x2_eval(r);
}

static void toom4_k2x2_eval_inf(uint16_t r[9 * K], const uint16_t a[M]) {
for (size_t i = 0; i < M; i++) {
r[i] = a[3 * M + i];
}
k2x2_eval(r);
}

static inline void k2x2_eval(uint16_t r[9 * K]) {
/* Input: e + f.Y + g.Y^2 + h.Y^3 */
/* Output: [ e | f | g | h | e+f | f+h | g+e | h+g | e+f+g+h ] */

size_t i;
for (i = 0; i < 4 * K; i++) {
r[4 * K + i] = r[i];
}
for (i = 0; i < K; i++) {
r[4 * K + i] += r[1 * K + i];
r[5 * K + i] += r[3 * K + i];
r[6 * K + i] += r[0 * K + i];
r[7 * K + i] += r[2 * K + i];
r[8 * K + i] = r[5 * K + i];
r[8 * K + i] += r[6 * K + i];
}
}

static void toom4_k2x2_basemul(uint16_t r[18 * K], const uint16_t a[9 * K], const uint16_t b[9 * K]) {
schoolbook_KxK(r + 0 * 2 * K, a + 0 * K, b + 0 * K);
schoolbook_KxK(r + 1 * 2 * K, a + 1 * K, b + 1 * K);
schoolbook_KxK(r + 2 * 2 * K, a + 2 * K, b + 2 * K);
schoolbook_KxK(r + 3 * 2 * K, a + 3 * K, b + 3 * K);
schoolbook_KxK(r + 4 * 2 * K, a + 4 * K, b + 4 * K);
schoolbook_KxK(r + 5 * 2 * K, a + 5 * K, b + 5 * K);
schoolbook_KxK(r + 6 * 2 * K, a + 6 * K, b + 6 * K);
schoolbook_KxK(r + 7 * 2 * K, a + 7 * K, b + 7 * K);
schoolbook_KxK(r + 8 * 2 * K, a + 8 * K, b + 8 * K);
}

static inline void schoolbook_KxK(uint16_t r[2 * K], const uint16_t a[K], const uint16_t b[K]) {
size_t i, j;
for (j = 0; j < K; j++) {
r[j] = a[0] * (uint32_t)b[j];
}
for (i = 1; i < K; i++) {
for (j = 0; j < K - 1; j++) {
r[i + j] += a[i] * (uint32_t)b[j];
}
r[i + K - 1] = a[i] * (uint32_t)b[K - 1];
}
r[2 * K - 1] = 0;
}

static void toom4_k2x2_interpolate(uint16_t r[2 * M], const uint16_t a[7 * 18 * K]) {
size_t i;

uint16_t P1[2 * M];
uint16_t Pm1[2 * M];
uint16_t P2[2 * M];
uint16_t Pm2[2 * M];

uint16_t *C0 = r;
uint16_t *C2 = r + 2 * M;
uint16_t *C4 = r + 4 * M;
uint16_t *C6 = r + 6 * M;

uint16_t V0, V1, V2;

k2x2_interpolate(C0, a + 0 * 9 * 2 * K);
k2x2_interpolate(P1, a + 1 * 9 * 2 * K);
k2x2_interpolate(Pm1, a + 2 * 9 * 2 * K);
k2x2_interpolate(P2, a + 3 * 9 * 2 * K);
k2x2_interpolate(Pm2, a + 4 * 9 * 2 * K);
k2x2_interpolate(C6, a + 6 * 9 * 2 * K);

for (i = 0; i < 2 * M; i++) {
V0 = ((uint32_t)(P1[i] + Pm1[i])) >> 1;
V0 = V0 - C0[i] - C6[i];
V1 = ((uint32_t)(P2[i] + Pm2[i] - 2 * C0[i] - 128 * C6[i])) >> 3;
C4[i] = 43691 * (uint32_t)(V1 - V0);
C2[i] = V0 - C4[i];
P1[i] = ((uint32_t)(P1[i] - Pm1[i])) >> 1;
}

/* reuse Pm1 for P3 */
#define P3 Pm1
k2x2_interpolate(P3, a + 5 * 9 * 2 * K);

for (i = 0; i < 2 * M; i++) {
V0 = P1[i];
V1 = 43691 * (((uint32_t)(P2[i] - Pm2[i]) >> 2) - V0);
V2 = 43691 * (uint32_t)(P3[i] - C0[i] - 9 * (C2[i] + 9 * (C4[i] + 9 * C6[i])));
V2 = ((uint32_t)(V2 - V0)) >> 3;
V2 -= V1;
P3[i] = 52429 * (uint32_t)V2;
P2[i] = V1 - V2;
P1[i] = V0 - P2[i] - P3[i];
}

for (i = 0; i < 2 * M; i++) {
r[1 * M + i] += P1[i];
r[3 * M + i] += P2[i];
r[5 * M + i] += P3[i];
}
}

static inline void k2x2_interpolate(uint16_t r[M], const uint16_t a[9 * K]) {
size_t i;
uint16_t tmp[4 * K];

for (i = 0; i < 2 * K; i++) {
r[0 * K + i] = a[0 * K + i];
r[2 * K + i] = a[2 * K + i];
}

for (i = 0; i < 2 * K; i++) {
r[1 * K + i] += a[8 * K + i] - a[0 * K + i] - a[2 * K + i];
}

for (i = 0; i < 2 * K; i++) {
r[4 * K + i] = a[4 * K + i];
r[6 * K + i] = a[6 * K + i];
}

for (i = 0; i < 2 * K; i++) {
r[5 * K + i] += a[14 * K + i] - a[4 * K + i] - a[6 * K + i];
}

for (i = 0; i < 2 * K; i++) {
tmp[0 * K + i] = a[12 * K + i];
tmp[2 * K + i] = a[10 * K + i];
}

for (i = 0; i < 2 * K; i++) {
tmp[K + i] += a[16 * K + i] - a[12 * K + i] - a[10 * K + i];
}

for (i = 0; i < 4 * K; i++) {
tmp[0 * K + i] = tmp[0 * K + i] - r[0 * K + i] - r[4 * K + i];
}

for (i = 0; i < 4 * K; i++) {
r[2 * K + i] += tmp[0 * K + i];
}
}


+ 0
- 78
src/kem/ntru/ntruhps2048509/clean/poly_s3_inv.c View File

@@ -1,78 +0,0 @@
/* Based on supercop-20200702/crypto_core/invhrss701/simpler/core.c */

#include "poly.h"

static inline uint8_t mod3(uint8_t a) { /* a between 0 and 9 */
int16_t t, c;
a = (a >> 2) + (a & 3); /* between 0 and 4 */
t = a - 3;
c = t >> 5;
return (uint8_t) (t ^ (c & (a ^ t)));
}

/* return -1 if x<0 and y<0; otherwise return 0 */
static inline int16_t both_negative_mask(int16_t x, int16_t y) {
return (x & y) >> 15;
}

void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_inv(poly *r, const poly *a) {
poly f, g, v, w;
size_t i, loop;
int16_t delta, sign, swap, t;

for (i = 0; i < NTRU_N; ++i) {
v.coeffs[i] = 0;
}
for (i = 0; i < NTRU_N; ++i) {
w.coeffs[i] = 0;
}
w.coeffs[0] = 1;

for (i = 0; i < NTRU_N; ++i) {
f.coeffs[i] = 1;
}
for (i = 0; i < NTRU_N - 1; ++i) {
g.coeffs[NTRU_N - 2 - i] = mod3((a->coeffs[i] & 3) + 2 * (a->coeffs[NTRU_N - 1] & 3));
}
g.coeffs[NTRU_N - 1] = 0;

delta = 1;

for (loop = 0; loop < 2 * (NTRU_N - 1) - 1; ++loop) {
for (i = NTRU_N - 1; i > 0; --i) {
v.coeffs[i] = v.coeffs[i - 1];
}
v.coeffs[0] = 0;

sign = mod3((uint8_t) (2 * g.coeffs[0] * f.coeffs[0]));
swap = both_negative_mask(-delta, -(int16_t) g.coeffs[0]);
delta ^= swap & (delta ^ -delta);
delta += 1;

for (i = 0; i < NTRU_N; ++i) {
t = swap & (f.coeffs[i] ^ g.coeffs[i]);
f.coeffs[i] ^= t;
g.coeffs[i] ^= t;
t = swap & (v.coeffs[i] ^ w.coeffs[i]);
v.coeffs[i] ^= t;
w.coeffs[i] ^= t;
}

for (i = 0; i < NTRU_N; ++i) {
g.coeffs[i] = mod3((uint8_t) (g.coeffs[i] + sign * f.coeffs[i]));
}
for (i = 0; i < NTRU_N; ++i) {
w.coeffs[i] = mod3((uint8_t) (w.coeffs[i] + sign * v.coeffs[i]));
}
for (i = 0; i < NTRU_N - 1; ++i) {
g.coeffs[i] = g.coeffs[i + 1];
}
g.coeffs[NTRU_N - 1] = 0;
}

sign = f.coeffs[0];
for (i = 0; i < NTRU_N - 1; ++i) {
r->coeffs[i] = mod3((uint8_t) (sign * v.coeffs[NTRU_N - 2 - i]));
}
r->coeffs[NTRU_N - 1] = 0;
}

+ 0
- 45
src/kem/ntru/ntruhps2048509/clean/sample.c View File

@@ -1,45 +0,0 @@
#include "sample.h"

void PQCLEAN_NTRUHPS2048509_CLEAN_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]) {

PQCLEAN_NTRUHPS2048509_CLEAN_sample_iid(f, uniformbytes);
PQCLEAN_NTRUHPS2048509_CLEAN_sample_fixed_type(g, uniformbytes + NTRU_SAMPLE_IID_BYTES);
}

void PQCLEAN_NTRUHPS2048509_CLEAN_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]) {

PQCLEAN_NTRUHPS2048509_CLEAN_sample_iid(r, uniformbytes);
PQCLEAN_NTRUHPS2048509_CLEAN_sample_fixed_type(m, uniformbytes + NTRU_SAMPLE_IID_BYTES);
}


void PQCLEAN_NTRUHPS2048509_CLEAN_sample_fixed_type(poly *r, const unsigned char u[NTRU_SAMPLE_FT_BYTES]) {
// Assumes NTRU_SAMPLE_FT_BYTES = ceil(30*(n-1)/8)

int32_t s[NTRU_N - 1];
int i;

// Use 30 bits of u per word
for (i = 0; i < (NTRU_N - 1) / 4; i++) {
s[4 * i + 0] = (u[15 * i + 0] << 2) + (u[15 * i + 1] << 10) + (u[15 * i + 2] << 18) + ((uint32_t) u[15 * i + 3] << 26);
s[4 * i + 1] = ((u[15 * i + 3] & 0xc0) >> 4) + (u[15 * i + 4] << 4) + (u[15 * i + 5] << 12) + (u[15 * i + 6] << 20) + ((uint32_t) u[15 * i + 7] << 28);
s[4 * i + 2] = ((u[15 * i + 7] & 0xf0) >> 2) + (u[15 * i + 8] << 6) + (u[15 * i + 9] << 14) + (u[15 * i + 10] << 22) + ((uint32_t) u[15 * i + 11] << 30);
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 16) + ((uint32_t) u[15 * i + 14] << 24);
}

for (i = 0; i < NTRU_WEIGHT / 2; i++) {
s[i] |= 1;
}

for (i = NTRU_WEIGHT / 2; i < NTRU_WEIGHT; i++) {
s[i] |= 2;
}

PQCLEAN_NTRUHPS2048509_CLEAN_crypto_sort_int32(s, NTRU_N - 1);

for (i = 0; i < NTRU_N - 1; i++) {
r->coeffs[i] = ((uint16_t) (s[i] & 3));
}

r->coeffs[NTRU_N - 1] = 0;
}

+ 0
- 17
src/kem/ntru/ntruhps2048509/clean/sample.h View File

@@ -1,17 +0,0 @@
#ifndef SAMPLE_H
#define SAMPLE_H

#include "params.h"
#include "poly.h"

#include "crypto_sort_int32.h"

void PQCLEAN_NTRUHPS2048509_CLEAN_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]);
void PQCLEAN_NTRUHPS2048509_CLEAN_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]);

void PQCLEAN_NTRUHPS2048509_CLEAN_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]);

void PQCLEAN_NTRUHPS2048509_CLEAN_sample_fixed_type(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_FT_BYTES]);


#endif

+ 0
- 26
src/kem/ntru/ntruhps2048509/clean/sample_iid.c View File

@@ -1,26 +0,0 @@
#include "sample.h"

static uint16_t mod3(uint16_t a) {
uint16_t r;
int16_t t, c;

r = (a >> 8) + (a & 0xff); // r mod 255 == a mod 255
r = (r >> 4) + (r & 0xf); // r' mod 15 == r mod 15
r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3
r = (r >> 2) + (r & 0x3); // r' mod 3 == r mod 3

t = r - 3;
c = t >> 15;

return (c & r) ^ (~c & t);
}

void PQCLEAN_NTRUHPS2048509_CLEAN_sample_iid(poly *r, const unsigned char uniformbytes[NTRU_SAMPLE_IID_BYTES]) {
int i;
/* {0,1,...,255} -> {0,1,2}; Pr[0] = 86/256, Pr[1] = Pr[-1] = 85/256 */
for (i = 0; i < NTRU_N - 1; i++) {
r->coeffs[i] = mod3(uniformbytes[i]);
}

r->coeffs[NTRU_N - 1] = 0;
}

+ 0
- 34
src/kem/ntru/ntruhps2048677/avx2/CMakeLists.txt View File

@@ -1,34 +0,0 @@
set(
SRC_AVX2_NTRUHPS2048677
cmov.c
crypto_sort_int32.c
kem.c
owcpa.c
pack3.c
packq.c
poly.c
poly_lift.c
poly_mod_3_Phi_n.s
poly_mod_q_Phi_n.s
poly_r2_inv.c
poly_r2_mul.s
poly_rq_mul.s
poly_rq_to_s3.s
poly_s3_inv.c
sample.c
sample_iid.c
square_10_677_shufbytes.s
square_1_677_patience.s
square_168_677_shufbytes.s
square_21_677_shufbytes.s
square_2_677_patience.s
square_336_677_shufbytes.s
square_3_677_patience.s
square_42_677_shufbytes.s
square_5_677_patience.s
square_84_677_shufbytes.s
vec32_sample_iid.s
)

define_kem_alg(ntruhps2048677_avx2
PQCLEAN_NTRUHPS2048677_AVX2 "${SRC_AVX2_NTRUHPS2048677}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 0
- 19
src/kem/ntru/ntruhps2048677/avx2/api.h View File

@@ -1,19 +0,0 @@
#ifndef PQCLEAN_NTRUHPS2048677_AVX2_API_H
#define PQCLEAN_NTRUHPS2048677_AVX2_API_H

#include <stdint.h>

#define PQCLEAN_NTRUHPS2048677_AVX2_CRYPTO_SECRETKEYBYTES 1234
#define PQCLEAN_NTRUHPS2048677_AVX2_CRYPTO_PUBLICKEYBYTES 930
#define PQCLEAN_NTRUHPS2048677_AVX2_CRYPTO_CIPHERTEXTBYTES 930
#define PQCLEAN_NTRUHPS2048677_AVX2_CRYPTO_BYTES 32

#define PQCLEAN_NTRUHPS2048677_AVX2_CRYPTO_ALGNAME "ntruhps2048677"

int PQCLEAN_NTRUHPS2048677_AVX2_crypto_kem_keypair(uint8_t *pk, uint8_t *sk);

int PQCLEAN_NTRUHPS2048677_AVX2_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk);

int PQCLEAN_NTRUHPS2048677_AVX2_crypto_kem_dec(uint8_t *k, const uint8_t *c, const uint8_t *sk);

#endif

+ 0
- 11
src/kem/ntru/ntruhps2048677/avx2/cmov.c View File

@@ -1,11 +0,0 @@
#include "cmov.h"

/* b = 1 means mov, b = 0 means don't mov*/
void PQCLEAN_NTRUHPS2048677_AVX2_cmov(unsigned char *r, const unsigned char *x, size_t len, unsigned char b) {
size_t i;

b = (~b + 1);
for (i = 0; i < len; i++) {
r[i] ^= b & (x[i] ^ r[i]);
}
}

+ 0
- 10
src/kem/ntru/ntruhps2048677/avx2/cmov.h View File

@@ -1,10 +0,0 @@
#ifndef VERIFY_H
#define VERIFY_H

#include "params.h"

#include <stddef.h>

void PQCLEAN_NTRUHPS2048677_AVX2_cmov(unsigned char *r, const unsigned char *x, size_t len, unsigned char b);

#endif

+ 0
- 1218
src/kem/ntru/ntruhps2048677/avx2/crypto_sort_int32.c
File diff suppressed because it is too large
View File


+ 0
- 11
src/kem/ntru/ntruhps2048677/avx2/crypto_sort_int32.h View File

@@ -1,11 +0,0 @@
#ifndef CRYPTO_SORT
#define CRYPTO_SORT

#include "params.h"

#include <stddef.h>
#include <stdint.h>

void PQCLEAN_NTRUHPS2048677_AVX2_crypto_sort_int32(int32_t *x, size_t n);

#endif

+ 0
- 63
src/kem/ntru/ntruhps2048677/avx2/kem.c View File

@@ -1,63 +0,0 @@
#include "api.h"
#include "cmov.h"
#include "fips202.h"
#include "owcpa.h"
#include "params.h"
#include "randombytes.h"
#include "sample.h"

// API FUNCTIONS
int PQCLEAN_NTRUHPS2048677_AVX2_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) {
uint8_t seed[NTRU_SAMPLE_FG_BYTES];

randombytes(seed, NTRU_SAMPLE_FG_BYTES);
PQCLEAN_NTRUHPS2048677_AVX2_owcpa_keypair(pk, sk, seed);

randombytes(sk + NTRU_OWCPA_SECRETKEYBYTES, NTRU_PRFKEYBYTES);

return 0;
}

int PQCLEAN_NTRUHPS2048677_AVX2_crypto_kem_enc(uint8_t *c, uint8_t *k, const uint8_t *pk) {
poly r, m;
uint8_t rm[NTRU_OWCPA_MSGBYTES];
uint8_t rm_seed[NTRU_SAMPLE_RM_BYTES];

randombytes(rm_seed, NTRU_SAMPLE_RM_BYTES);

PQCLEAN_NTRUHPS2048677_AVX2_sample_rm(&r, &m, rm_seed);

PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(rm, &r);
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, &m);
sha3_256(k, rm, NTRU_OWCPA_MSGBYTES);

PQCLEAN_NTRUHPS2048677_AVX2_poly_Z3_to_Zq(&r);
PQCLEAN_NTRUHPS2048677_AVX2_owcpa_enc(c, &r, &m, pk);

return 0;
}

int PQCLEAN_NTRUHPS2048677_AVX2_crypto_kem_dec(uint8_t *k, const uint8_t *c, const uint8_t *sk) {
int i, fail;
uint8_t rm[NTRU_OWCPA_MSGBYTES];
uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES];

fail = PQCLEAN_NTRUHPS2048677_AVX2_owcpa_dec(rm, c, sk);
/* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */
/* See comment in PQCLEAN_NTRUHPS2048677_AVX2_owcpa_dec for details. */

sha3_256(k, rm, NTRU_OWCPA_MSGBYTES);

/* shake(secret PRF key || input ciphertext) */
for (i = 0; i < NTRU_PRFKEYBYTES; i++) {
buf[i] = sk[i + NTRU_OWCPA_SECRETKEYBYTES];
}
for (i = 0; i < NTRU_CIPHERTEXTBYTES; i++) {
buf[NTRU_PRFKEYBYTES + i] = c[i];
}
sha3_256(rm, buf, NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES);

PQCLEAN_NTRUHPS2048677_AVX2_cmov(k, rm, NTRU_SHAREDKEYBYTES, (unsigned char) fail);

return 0;
}

+ 0
- 183
src/kem/ntru/ntruhps2048677/avx2/owcpa.c View File

@@ -1,183 +0,0 @@
#include "owcpa.h"
#include "poly.h"
#include "sample.h"

static int owcpa_check_ciphertext(const unsigned char *ciphertext) {
/* A ciphertext is log2(q)*(n-1) bits packed into bytes. */
/* Check that any unused bits of the final byte are zero. */

uint16_t t = 0;

t = ciphertext[NTRU_CIPHERTEXTBYTES - 1];
t &= 0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG)));

/* We have 0 <= t < 256 */
/* Return 0 on success (t=0), 1 on failure */
return (int) (1 & ((~t + 1) >> 15));
}

static int owcpa_check_r(const poly *r) {
/* A valid r has coefficients in {0,1,q-1} and has r[N-1] = 0 */
/* Note: We may assume that 0 <= r[i] <= q-1 for all i */

int i;
uint32_t t = 0;
uint16_t c;
for (i = 0; i < NTRU_N - 1; i++) {
c = r->coeffs[i];
t |= (c + 1) & (NTRU_Q - 4); /* 0 iff c is in {-1,0,1,2} */
t |= (c + 2) & 4; /* 1 if c = 2, 0 if c is in {-1,0,1} */
}
t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */

/* We have 0 <= t < 2^16. */
/* Return 0 on success (t=0), 1 on failure */
return (int) (1 & ((~t + 1) >> 31));
}

static int owcpa_check_m(const poly *m) {
/* Check that m is in message space, i.e. */
/* (1) |{i : m[i] = 1}| = |{i : m[i] = 2}|, and */
/* (2) |{i : m[i] != 0}| = NTRU_WEIGHT. */
/* Note: We may assume that m has coefficients in {0,1,2}. */

int i;
uint32_t t = 0;
uint16_t ps = 0;
uint16_t ms = 0;
for (i = 0; i < NTRU_N; i++) {
ps += m->coeffs[i] & 1;
ms += m->coeffs[i] & 2;
}
t |= ps ^ (ms >> 1); /* 0 if (1) holds */
t |= ms ^ NTRU_WEIGHT; /* 0 if (1) and (2) hold */

/* We have 0 <= t < 2^16. */
/* Return 0 on success (t=0), 1 on failure */
return (int) (1 & ((~t + 1) >> 31));
}

void PQCLEAN_NTRUHPS2048677_AVX2_owcpa_keypair(unsigned char *pk,
unsigned char *sk,
const unsigned char seed[NTRU_SAMPLE_FG_BYTES]) {
int i;

poly x1, x2, x3, x4, x5;

poly *f = &x1, *g = &x2, *invf_mod3 = &x3;
poly *gf = &x3, *invgf = &x4, *tmp = &x5;
poly *invh = &x3, *h = &x3;

PQCLEAN_NTRUHPS2048677_AVX2_sample_fg(f, g, seed);

PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_inv(invf_mod3, f);
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(sk, f);
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(sk + NTRU_PACK_TRINARY_BYTES, invf_mod3);

/* Lift coeffs of f and g from Z_p to Z_q */
PQCLEAN_NTRUHPS2048677_AVX2_poly_Z3_to_Zq(f);
PQCLEAN_NTRUHPS2048677_AVX2_poly_Z3_to_Zq(g);


/* g = 3*g */
for (i = 0; i < NTRU_N; i++) {
g->coeffs[i] = 3 * g->coeffs[i];
}

PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(gf, g, f);

PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_inv(invgf, gf);

PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(tmp, invgf, f);
PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_mul(invh, tmp, f);
PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_tobytes(sk + 2 * NTRU_PACK_TRINARY_BYTES, invh);

PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(tmp, invgf, g);
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(h, tmp, g);
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_tobytes(pk, h);
}


void PQCLEAN_NTRUHPS2048677_AVX2_owcpa_enc(unsigned char *c,
const poly *r,
const poly *m,
const unsigned char *pk) {
int i;
poly x1, x2;
poly *h = &x1, *liftm = &x1;
poly *ct = &x2;

PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_frombytes(h, pk);

PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(ct, r, h);

PQCLEAN_NTRUHPS2048677_AVX2_poly_lift(liftm, m);
for (i = 0; i < NTRU_N; i++) {
ct->coeffs[i] = ct->coeffs[i] + liftm->coeffs[i];
}

PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_tobytes(c, ct);
}

int PQCLEAN_NTRUHPS2048677_AVX2_owcpa_dec(unsigned char *rm,
const unsigned char *ciphertext,
const unsigned char *secretkey) {
int i;
int fail;
poly x1, x2, x3, x4;

poly *c = &x1, *f = &x2, *cf = &x3;
poly *mf = &x2, *finv3 = &x3, *m = &x4;
poly *liftm = &x2, *invh = &x3, *r = &x4;
poly *b = &x1;

PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_frombytes(c, ciphertext);
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_frombytes(f, secretkey);
PQCLEAN_NTRUHPS2048677_AVX2_poly_Z3_to_Zq(f);

PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(cf, c, f);
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_to_S3(mf, cf);

PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_frombytes(finv3, secretkey + NTRU_PACK_TRINARY_BYTES);
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_mul(m, mf, finv3);
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, m);

fail = 0;

/* Check that the unused bits of the last byte of the ciphertext are zero */
fail |= owcpa_check_ciphertext(ciphertext);

/* For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
/* We can avoid re-computing r*h + Lift(m) as long as we check that */
/* r (defined as b/h mod (q, Phi_n)) and m are in the message space. */
/* (m can take any value in S3 in NTRU_HRSS) */
fail |= owcpa_check_m(m);

/* b = c - Lift(m) mod (q, x^n - 1) */
PQCLEAN_NTRUHPS2048677_AVX2_poly_lift(liftm, m);
for (i = 0; i < NTRU_N; i++) {
b->coeffs[i] = c->coeffs[i] - liftm->coeffs[i];
}

/* r = b / h mod (q, Phi_n) */
PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_frombytes(invh, secretkey + 2 * NTRU_PACK_TRINARY_BYTES);
PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_mul(r, b, invh);

/* NOTE: Our definition of r as b/h mod (q, Phi_n) follows Figure 4 of */
/* [Sch18] https://eprint.iacr.org/2018/1174/20181203:032458. */
/* This differs from Figure 10 of Saito--Xagawa--Yamakawa */
/* [SXY17] https://eprint.iacr.org/2017/1005/20180516:055500 */
/* where r gets a final reduction modulo p. */
/* We need this change to use Proposition 1 of [Sch18]. */

/* Proposition 1 of [Sch18] shows that re-encryption with (r,m) yields c. */
/* if and only if fail==0 after the following call to owcpa_check_r */
/* The procedure given in Fig. 8 of [Sch18] can be skipped because we have */
/* c(1) = 0 due to the use of poly_Rq_sum_zero_{to,from}bytes. */
fail |= owcpa_check_r(r);

PQCLEAN_NTRUHPS2048677_AVX2_poly_trinary_Zq_to_Z3(r);
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(rm, r);

return fail;
}

+ 0
- 19
src/kem/ntru/ntruhps2048677/avx2/owcpa.h View File

@@ -1,19 +0,0 @@
#ifndef OWCPA_H
#define OWCPA_H

#include "params.h"
#include "poly.h"

void PQCLEAN_NTRUHPS2048677_AVX2_owcpa_keypair(unsigned char *pk,
unsigned char *sk,
const unsigned char seed[NTRU_SAMPLE_FG_BYTES]);

void PQCLEAN_NTRUHPS2048677_AVX2_owcpa_enc(unsigned char *c,
const poly *r,
const poly *m,
const unsigned char *pk);

int PQCLEAN_NTRUHPS2048677_AVX2_owcpa_dec(unsigned char *rm,
const unsigned char *ciphertext,
const unsigned char *secretkey);
#endif

+ 0
- 46
src/kem/ntru/ntruhps2048677/avx2/pack3.c View File

@@ -1,46 +0,0 @@
#include "poly.h"

void PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(unsigned char msg[NTRU_OWCPA_MSGBYTES], const poly *a) {
int i;
unsigned char c;
int j;

for (i = 0; i < NTRU_PACK_DEG / 5; i++) {
c = a->coeffs[5 * i + 4] & 255;
c = (3 * c + a->coeffs[5 * i + 3]) & 255;
c = (3 * c + a->coeffs[5 * i + 2]) & 255;
c = (3 * c + a->coeffs[5 * i + 1]) & 255;
c = (3 * c + a->coeffs[5 * i + 0]) & 255;
msg[i] = c;
}
i = NTRU_PACK_DEG / 5;
c = 0;
for (j = NTRU_PACK_DEG - (5 * i) - 1; j >= 0; j--) {
c = (3 * c + a->coeffs[5 * i + j]) & 255;
}
msg[i] = c;
}

void PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_frombytes(poly *r, const unsigned char msg[NTRU_OWCPA_MSGBYTES]) {
int i;
unsigned char c;
int j;

for (i = 0; i < NTRU_PACK_DEG / 5; i++) {
c = msg[i];
r->coeffs[5 * i + 0] = c;
r->coeffs[5 * i + 1] = c * 171 >> 9; // this is division by 3
r->coeffs[5 * i + 2] = c * 57 >> 9; // division by 3^2
r->coeffs[5 * i + 3] = c * 19 >> 9; // division by 3^3
r->coeffs[5 * i + 4] = c * 203 >> 14; // etc.
}
i = NTRU_PACK_DEG / 5;
c = msg[i];
for (j = 0; (5 * i + j) < NTRU_PACK_DEG; j++) {
r->coeffs[5 * i + j] = c;
c = c * 171 >> 9;
}
r->coeffs[NTRU_N - 1] = 0;
PQCLEAN_NTRUHPS2048677_AVX2_poly_mod_3_Phi_n(r);
}


+ 0
- 93
src/kem/ntru/ntruhps2048677/avx2/packq.c View File

@@ -1,93 +0,0 @@
#include "poly.h"

void PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_tobytes(unsigned char *r, const poly *a) {
int i, j;
uint16_t t[8];

for (i = 0; i < NTRU_PACK_DEG / 8; i++) {
for (j = 0; j < 8; j++) {
t[j] = MODQ(a->coeffs[8 * i + j]);
}

r[11 * i + 0] = (unsigned char) ( t[0] & 0xff);
r[11 * i + 1] = (unsigned char) ((t[0] >> 8) | ((t[1] & 0x1f) << 3));
r[11 * i + 2] = (unsigned char) ((t[1] >> 5) | ((t[2] & 0x03) << 6));
r[11 * i + 3] = (unsigned char) ((t[2] >> 2) & 0xff);
r[11 * i + 4] = (unsigned char) ((t[2] >> 10) | ((t[3] & 0x7f) << 1));
r[11 * i + 5] = (unsigned char) ((t[3] >> 7) | ((t[4] & 0x0f) << 4));
r[11 * i + 6] = (unsigned char) ((t[4] >> 4) | ((t[5] & 0x01) << 7));
r[11 * i + 7] = (unsigned char) ((t[5] >> 1) & 0xff);
r[11 * i + 8] = (unsigned char) ((t[5] >> 9) | ((t[6] & 0x3f) << 2));
r[11 * i + 9] = (unsigned char) ((t[6] >> 6) | ((t[7] & 0x07) << 5));
r[11 * i + 10] = (unsigned char) ((t[7] >> 3));
}

for (j = 0; j < NTRU_PACK_DEG - 8 * i; j++) {
t[j] = MODQ(a->coeffs[8 * i + j]);
}
for (; j < 8; j++) {
t[j] = 0;
}

switch (NTRU_PACK_DEG & 0x07) {
// cases 0 and 6 are impossible since 2 generates (Z/n)* and
// p mod 8 in {1, 7} implies that 2 is a quadratic residue.
case 4:
r[11 * i + 0] = (unsigned char) (t[0] & 0xff);
r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3);
r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6);
r[11 * i + 3] = (unsigned char) (t[2] >> 2) & 0xff;
r[11 * i + 4] = (unsigned char) (t[2] >> 10) | ((t[3] & 0x7f) << 1);
r[11 * i + 5] = (unsigned char) (t[3] >> 7) | ((t[4] & 0x0f) << 4);
break;
case 2:
r[11 * i + 0] = (unsigned char) (t[0] & 0xff);
r[11 * i + 1] = (unsigned char) (t[0] >> 8) | ((t[1] & 0x1f) << 3);
r[11 * i + 2] = (unsigned char) (t[1] >> 5) | ((t[2] & 0x03) << 6);
break;
}
}

void PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_frombytes(poly *r, const unsigned char *a) {
int i;
for (i = 0; i < NTRU_PACK_DEG / 8; i++) {
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8);
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5);
r->coeffs[8 * i + 2] = (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10);
r->coeffs[8 * i + 3] = (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7);
r->coeffs[8 * i + 4] = (a[11 * i + 5] >> 4) | (((uint16_t)a[11 * i + 6] & 0x7f) << 4);
r->coeffs[8 * i + 5] = (a[11 * i + 6] >> 7) | (((uint16_t)a[11 * i + 7] & 0xff) << 1) | (((uint16_t)a[11 * i + 8] & 0x03) << 9);
r->coeffs[8 * i + 6] = (a[11 * i + 8] >> 2) | (((uint16_t)a[11 * i + 9] & 0x1f) << 6);
r->coeffs[8 * i + 7] = (a[11 * i + 9] >> 5) | (((uint16_t)a[11 * i + 10] & 0xff) << 3);
}
switch (NTRU_PACK_DEG & 0x07) {
// cases 0 and 6 are impossible since 2 generates (Z/n)* and
// p mod 8 in {1, 7} implies that 2 is a quadratic residue.
case 4:
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8);
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5);
r->coeffs[8 * i + 2] = (a[11 * i + 2] >> 6) | (((uint16_t)a[11 * i + 3] & 0xff) << 2) | (((uint16_t)a[11 * i + 4] & 0x01) << 10);
r->coeffs[8 * i + 3] = (a[11 * i + 4] >> 1) | (((uint16_t)a[11 * i + 5] & 0x0f) << 7);
break;
case 2:
r->coeffs[8 * i + 0] = (a[11 * i + 0] >> 0) | (((uint16_t)a[11 * i + 1] & 0x07) << 8);
r->coeffs[8 * i + 1] = (a[11 * i + 1] >> 3) | (((uint16_t)a[11 * i + 2] & 0x3f) << 5);
break;
}
r->coeffs[NTRU_N - 1] = 0;
}

void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a) {
PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_tobytes(r, a);
}

void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_frombytes(poly *r, const unsigned char *a) {
int i;
PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_frombytes(r, a);

/* Set r[n-1] so that the sum of coefficients is zero mod q */
r->coeffs[NTRU_N - 1] = 0;
for (i = 0; i < NTRU_PACK_DEG; i++) {
r->coeffs[NTRU_N - 1] -= r->coeffs[i];
}
}

+ 0
- 37
src/kem/ntru/ntruhps2048677/avx2/params.h View File

@@ -1,37 +0,0 @@
#ifndef PARAMS_H
#define PARAMS_H

#define NTRU_HPS
#define NTRU_N 677
#define NTRU_LOGQ 11


/* Do not modify below this line */

#define PAD32(X) ((((X) + 31)/32)*32)

#define NTRU_Q (1 << NTRU_LOGQ)
#define NTRU_WEIGHT (NTRU_Q/8 - 2)

#define NTRU_SEEDBYTES 32
#define NTRU_PRFKEYBYTES 32
#define NTRU_SHAREDKEYBYTES 32

#define NTRU_SAMPLE_IID_BYTES (NTRU_N-1)
#define NTRU_SAMPLE_FT_BYTES ((30*(NTRU_N-1)+7)/8)
#define NTRU_SAMPLE_FG_BYTES (NTRU_SAMPLE_IID_BYTES+NTRU_SAMPLE_FT_BYTES)
#define NTRU_SAMPLE_RM_BYTES (NTRU_SAMPLE_IID_BYTES+NTRU_SAMPLE_FT_BYTES)

#define NTRU_PACK_DEG (NTRU_N-1)
#define NTRU_PACK_TRINARY_BYTES ((NTRU_PACK_DEG+4)/5)

#define NTRU_OWCPA_MSGBYTES (2*NTRU_PACK_TRINARY_BYTES)
#define NTRU_OWCPA_PUBLICKEYBYTES ((NTRU_LOGQ*NTRU_PACK_DEG+7)/8)
#define NTRU_OWCPA_SECRETKEYBYTES (2*NTRU_PACK_TRINARY_BYTES + NTRU_OWCPA_PUBLICKEYBYTES)
#define NTRU_OWCPA_BYTES ((NTRU_LOGQ*NTRU_PACK_DEG+7)/8)

#define NTRU_PUBLICKEYBYTES (NTRU_OWCPA_PUBLICKEYBYTES)
#define NTRU_SECRETKEYBYTES (NTRU_OWCPA_SECRETKEYBYTES + NTRU_PRFKEYBYTES)
#define NTRU_CIPHERTEXTBYTES (NTRU_OWCPA_BYTES)

#endif

+ 0
- 75
src/kem/ntru/ntruhps2048677/avx2/poly.c View File

@@ -1,75 +0,0 @@
#include "poly.h"

/* Map {0, 1, 2} -> {0,1,q-1} in place */
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Z3_to_Zq(poly *r) {
int i;
for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = r->coeffs[i] | ((-(r->coeffs[i] >> 1)) & (NTRU_Q - 1));
}
}

/* Map {0, 1, q-1} -> {0,1,2} in place */
void PQCLEAN_NTRUHPS2048677_AVX2_poly_trinary_Zq_to_Z3(poly *r) {
int i;
for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = MODQ(r->coeffs[i]);
r->coeffs[i] = 3 & (r->coeffs[i] ^ (r->coeffs[i] >> (NTRU_LOGQ - 1)));
}
}

void PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_mul(poly *r, const poly *a, const poly *b) {
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(r, a, b);
PQCLEAN_NTRUHPS2048677_AVX2_poly_mod_q_Phi_n(r);
}

void PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_mul(poly *r, const poly *a, const poly *b) {
int i;

/* Our S3 multiplications do not overflow mod q, */
/* so we can re-purpose PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul, as long as we */
/* follow with an explicit reduction mod q. */
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(r, a, b);
for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = MODQ(r->coeffs[i]);
}
PQCLEAN_NTRUHPS2048677_AVX2_poly_mod_3_Phi_n(r);
}

static void PQCLEAN_NTRUHPS2048677_AVX2_poly_R2_inv_to_Rq_inv(poly *r, const poly *ai, const poly *a) {

int i;
poly b, c;
poly s;

// for 0..4
// ai = ai * (2 - a*ai) mod q
for (i = 0; i < NTRU_N; i++) {
b.coeffs[i] = -(a->coeffs[i]);
}

for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = ai->coeffs[i];
}

PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(&c, r, &b);
c.coeffs[0] += 2; // c = 2 - a*ai
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(&s, &c, r); // s = ai*c

PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(&c, &s, &b);
c.coeffs[0] += 2; // c = 2 - a*s
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(r, &c, &s); // r = s*c

PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(&c, r, &b);
c.coeffs[0] += 2; // c = 2 - a*r
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(&s, &c, r); // s = r*c

PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(&c, &s, &b);
c.coeffs[0] += 2; // c = 2 - a*s
PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(r, &c, &s); // r = s*c
}

void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_inv(poly *r, const poly *a) {
poly ai2;
PQCLEAN_NTRUHPS2048677_AVX2_poly_R2_inv(&ai2, a);
PQCLEAN_NTRUHPS2048677_AVX2_poly_R2_inv_to_Rq_inv(r, &ai2, a);
}

+ 0
- 41
src/kem/ntru/ntruhps2048677/avx2/poly.h View File

@@ -1,41 +0,0 @@
#ifndef POLY_H
#define POLY_H

#include <immintrin.h>
#include <stdint.h>

#include "params.h"

#define MODQ(X) ((X) & (NTRU_Q-1))

typedef union { /* align to 32 byte boundary for vmovdqa */
uint16_t coeffs[PAD32(NTRU_N)];
__m256i coeffs_x16[PAD32(NTRU_N) / 16];
} poly;

void PQCLEAN_NTRUHPS2048677_AVX2_poly_mod_3_Phi_n(poly *r);
void PQCLEAN_NTRUHPS2048677_AVX2_poly_mod_q_Phi_n(poly *r);

void PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_tobytes(unsigned char *r, const poly *a);
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_frombytes(poly *r, const unsigned char *a);

void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_tobytes(unsigned char *r, const poly *a);
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_sum_zero_frombytes(poly *r, const unsigned char *a);

void PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(unsigned char msg[NTRU_PACK_TRINARY_BYTES], const poly *a);
void PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_frombytes(poly *r, const unsigned char msg[NTRU_PACK_TRINARY_BYTES]);

void PQCLEAN_NTRUHPS2048677_AVX2_poly_Sq_mul(poly *r, const poly *a, const poly *b);
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_mul(poly *r, const poly *a, const poly *b);
void PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_mul(poly *r, const poly *a, const poly *b);
void PQCLEAN_NTRUHPS2048677_AVX2_poly_lift(poly *r, const poly *a);
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_to_S3(poly *r, const poly *a);

void PQCLEAN_NTRUHPS2048677_AVX2_poly_R2_inv(poly *r, const poly *a);
void PQCLEAN_NTRUHPS2048677_AVX2_poly_Rq_inv(poly *r, const poly *a);
void PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_inv(poly *r, const poly *a);

void PQCLEAN_NTRUHPS2048677_AVX2_poly_Z3_to_Zq(poly *r);
void PQCLEAN_NTRUHPS2048677_AVX2_poly_trinary_Zq_to_Z3(poly *r);

#endif

+ 0
- 11
src/kem/ntru/ntruhps2048677/avx2/poly_lift.c View File

@@ -1,11 +0,0 @@
#include "poly.h"

void PQCLEAN_NTRUHPS2048677_AVX2_poly_lift(poly *r, const poly *a) {
int i;
for (i = 0; i < NTRU_N; i++) {
r->coeffs[i] = a->coeffs[i];
}
PQCLEAN_NTRUHPS2048677_AVX2_poly_Z3_to_Zq(r);
}



Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save