Преглед на файлове

adds HQC-RMS-128/192/256

kyber
Henry Case преди 3 години
родител
ревизия
fddd697fc4
променени са 100 файла, в които са добавени 10301 реда и са изтрити 111 реда
  1. +9
    -0
      CMakeLists.txt
  2. +1
    -0
      README.md
  3. +4
    -1
      public/pqc/pqc.h
  4. +6
    -0
      src/capi/pqapi.c
  5. +0
    -17
      src/common/cpucycles.c
  6. +0
    -33
      src/common/cpucycles.h
  7. +0
    -51
      src/common/speed_print.c
  8. +0
    -9
      src/common/speed_print.h
  9. +16
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/CMakeLists.txt
  10. +25
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/api.h
  11. +47
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/code.c
  12. +18
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/code.h
  13. +351
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/fft.c
  14. +18
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/fft.h
  15. +176
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/gf.c
  16. +69
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/gf.h
  17. +369
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/gf2x.c
  18. +21
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/gf2x.h
  19. +168
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/hqc.c
  20. +19
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/hqc.h
  21. +140
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/kem.c
  22. +111
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/parameters.h
  23. +186
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/parsing.c
  24. +36
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/parsing.h
  25. +389
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/reed_muller.c
  26. +18
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/reed_muller.h
  27. +466
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/reed_solomon.c
  28. +20
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/reed_solomon.h
  29. +178
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/vector.c
  30. +27
    -0
      src/kem/hqc/hqc-rmrs-128/avx2/vector.h
  31. +16
    -0
      src/kem/hqc/hqc-rmrs-128/clean/CMakeLists.txt
  32. +25
    -0
      src/kem/hqc/hqc-rmrs-128/clean/api.h
  33. +46
    -0
      src/kem/hqc/hqc-rmrs-128/clean/code.c
  34. +18
    -0
      src/kem/hqc/hqc-rmrs-128/clean/code.h
  35. +351
    -0
      src/kem/hqc/hqc-rmrs-128/clean/fft.c
  36. +18
    -0
      src/kem/hqc/hqc-rmrs-128/clean/fft.h
  37. +63
    -0
      src/kem/hqc/hqc-rmrs-128/clean/gf.c
  38. +39
    -0
      src/kem/hqc/hqc-rmrs-128/clean/gf.h
  39. +154
    -0
      src/kem/hqc/hqc-rmrs-128/clean/gf2x.c
  40. +16
    -0
      src/kem/hqc/hqc-rmrs-128/clean/gf2x.h
  41. +144
    -0
      src/kem/hqc/hqc-rmrs-128/clean/hqc.c
  42. +19
    -0
      src/kem/hqc/hqc-rmrs-128/clean/hqc.h
  43. +140
    -0
      src/kem/hqc/hqc-rmrs-128/clean/kem.c
  44. +98
    -0
      src/kem/hqc/hqc-rmrs-128/clean/parameters.h
  45. +186
    -0
      src/kem/hqc/hqc-rmrs-128/clean/parsing.c
  46. +36
    -0
      src/kem/hqc/hqc-rmrs-128/clean/parsing.h
  47. +237
    -0
      src/kem/hqc/hqc-rmrs-128/clean/reed_muller.c
  48. +18
    -0
      src/kem/hqc/hqc-rmrs-128/clean/reed_muller.h
  49. +349
    -0
      src/kem/hqc/hqc-rmrs-128/clean/reed_solomon.c
  50. +20
    -0
      src/kem/hqc/hqc-rmrs-128/clean/reed_solomon.h
  51. +176
    -0
      src/kem/hqc/hqc-rmrs-128/clean/vector.c
  52. +27
    -0
      src/kem/hqc/hqc-rmrs-128/clean/vector.h
  53. +16
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/CMakeLists.txt
  54. +25
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/api.h
  55. +47
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/code.c
  56. +18
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/code.h
  57. +351
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/fft.c
  58. +18
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/fft.h
  59. +176
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/gf.c
  60. +69
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/gf.h
  61. +408
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/gf2x.c
  62. +21
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/gf2x.h
  63. +168
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/hqc.c
  64. +19
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/hqc.h
  65. +140
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/kem.c
  66. +109
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/parameters.h
  67. +186
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/parsing.c
  68. +36
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/parsing.h
  69. +389
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/reed_muller.c
  70. +18
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/reed_muller.h
  71. +476
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/reed_solomon.c
  72. +20
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/reed_solomon.h
  73. +178
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/vector.c
  74. +27
    -0
      src/kem/hqc/hqc-rmrs-192/avx2/vector.h
  75. +16
    -0
      src/kem/hqc/hqc-rmrs-192/clean/CMakeLists.txt
  76. +25
    -0
      src/kem/hqc/hqc-rmrs-192/clean/api.h
  77. +46
    -0
      src/kem/hqc/hqc-rmrs-192/clean/code.c
  78. +18
    -0
      src/kem/hqc/hqc-rmrs-192/clean/code.h
  79. +351
    -0
      src/kem/hqc/hqc-rmrs-192/clean/fft.c
  80. +18
    -0
      src/kem/hqc/hqc-rmrs-192/clean/fft.h
  81. +63
    -0
      src/kem/hqc/hqc-rmrs-192/clean/gf.c
  82. +39
    -0
      src/kem/hqc/hqc-rmrs-192/clean/gf.h
  83. +154
    -0
      src/kem/hqc/hqc-rmrs-192/clean/gf2x.c
  84. +16
    -0
      src/kem/hqc/hqc-rmrs-192/clean/gf2x.h
  85. +144
    -0
      src/kem/hqc/hqc-rmrs-192/clean/hqc.c
  86. +19
    -0
      src/kem/hqc/hqc-rmrs-192/clean/hqc.h
  87. +140
    -0
      src/kem/hqc/hqc-rmrs-192/clean/kem.c
  88. +98
    -0
      src/kem/hqc/hqc-rmrs-192/clean/parameters.h
  89. +186
    -0
      src/kem/hqc/hqc-rmrs-192/clean/parsing.c
  90. +36
    -0
      src/kem/hqc/hqc-rmrs-192/clean/parsing.h
  91. +237
    -0
      src/kem/hqc/hqc-rmrs-192/clean/reed_muller.c
  92. +18
    -0
      src/kem/hqc/hqc-rmrs-192/clean/reed_muller.h
  93. +349
    -0
      src/kem/hqc/hqc-rmrs-192/clean/reed_solomon.c
  94. +20
    -0
      src/kem/hqc/hqc-rmrs-192/clean/reed_solomon.h
  95. +176
    -0
      src/kem/hqc/hqc-rmrs-192/clean/vector.c
  96. +27
    -0
      src/kem/hqc/hqc-rmrs-192/clean/vector.h
  97. +16
    -0
      src/kem/hqc/hqc-rmrs-256/avx2/CMakeLists.txt
  98. +25
    -0
      src/kem/hqc/hqc-rmrs-256/avx2/api.h
  99. +47
    -0
      src/kem/hqc/hqc-rmrs-256/avx2/code.c
  100. +18
    -0
      src/kem/hqc/hqc-rmrs-256/avx2/code.h

+ 9
- 0
CMakeLists.txt Целия файл

@@ -141,6 +141,9 @@ add_subdirectory(src/kem/ntru/ntruhps2048677/clean)
add_subdirectory(src/kem/ntru_prime/ntrulpr761/clean)
add_subdirectory(src/kem/ntru_prime/ntrulpr653/clean)
add_subdirectory(src/kem/ntru_prime/ntrulpr857/clean)
add_subdirectory(src/kem/hqc/hqc-rmrs-128/clean)
add_subdirectory(src/kem/hqc/hqc-rmrs-192/clean)
add_subdirectory(src/kem/hqc/hqc-rmrs-256/clean)

# Hardware optimized targets
if(${ARCH} STREQUAL "ARCH_x86_64")
@@ -196,8 +199,13 @@ add_subdirectory(src/kem/ntru/ntruhps2048677/avx2)
add_subdirectory(src/kem/ntru_prime/ntrulpr761/avx2)
add_subdirectory(src/kem/ntru_prime/ntrulpr653/avx2)
add_subdirectory(src/kem/ntru_prime/ntrulpr857/avx2)
add_subdirectory(src/kem/hqc/hqc-rmrs-128/avx2)
add_subdirectory(src/kem/hqc/hqc-rmrs-192/avx2)
add_subdirectory(src/kem/hqc/hqc-rmrs-256/avx2)
endif()



# The rest of the library
set(SRC_COMMON_GENERIC
src/common/aes.c
@@ -205,6 +213,7 @@ set(SRC_COMMON_GENERIC
src/common/sp800-185.c
src/common/randombytes.c
src/common/sha2.c
src/common/nistseedexpander.c
src/capi/pqapi.c
)



+ 1
- 0
README.md Целия файл

@@ -13,6 +13,7 @@ Users shouldn't expect any level of security provided by this code. The library
| SABER | 3 | x |
| FrodoKEM | 3 | |
| NTRU Prime | 3 | x |
| HQC-RMRS | 3 | x |
| Dilithium | 3 | x |
| Falcon | 2 | |
| Rainbow | 3 | |


+ 4
- 1
public/pqc/pqc.h Целия файл

@@ -60,7 +60,10 @@ extern "C" {
_(NTRULPR857) \
_(LIGHTSABER) \
_(FIRESABER) \
_(SABER)
_(SABER) \
_(HQCRMRS128) \
_(HQCRMRS192) \
_(HQCRMRS256)

// Defines IDs for each algorithm. The
// PQC_ALG_SIG/KEM_MAX indicates number


+ 6
- 0
src/capi/pqapi.c Целия файл

@@ -113,6 +113,12 @@
#include "kem/saber/firesaber/avx2/api.h"
#include "kem/saber/saber/clean/api.h"
#include "kem/saber/saber/avx2/api.h"
#include "kem/hqc/hqc-rmrs-128/clean/api.h"
#include "kem/hqc/hqc-rmrs-192/clean/api.h"
#include "kem/hqc/hqc-rmrs-256/clean/api.h"
#include "kem/hqc/hqc-rmrs-128/avx2/api.h"
#include "kem/hqc/hqc-rmrs-192/avx2/api.h"
#include "kem/hqc/hqc-rmrs-256/avx2/api.h"

// not proud of this thingy
#define OPT_VERSION _CLEAN_


+ 0
- 17
src/common/cpucycles.c Целия файл

@@ -1,17 +0,0 @@
#include <stdint.h>
#include "cpucycles.h"

uint64_t cpucycles_overhead(void) {
uint64_t t0, t1, overhead = -1LL;
unsigned int i;

for(i=0;i<100000;i++) {
t0 = cpucycles();
__asm__ volatile ("");
t1 = cpucycles();
if(t1 - t0 < overhead)
overhead = t1 - t0;
}

return overhead;
}

+ 0
- 33
src/common/cpucycles.h Целия файл

@@ -1,33 +0,0 @@
#ifndef CPUCYCLES_H
#define CPUCYCLES_H

#include <stdint.h>

#ifdef USE_RDPMC /* Needs echo 2 > /sys/devices/cpu/rdpmc */

static inline uint64_t cpucycles(void) {
const uint32_t ecx = (1U << 30) + 1;
uint64_t result;

__asm__ volatile ("rdpmc; shlq $32,%%rdx; orq %%rdx,%%rax"
: "=a" (result) : "c" (ecx) : "rdx");

return result;
}

#else

static inline uint64_t cpucycles(void) {
uint64_t result;

__asm__ volatile ("rdtsc; shlq $32,%%rdx; orq %%rdx,%%rax"
: "=a" (result) : : "%rdx");

return result;
}

#endif

uint64_t cpucycles_overhead(void);

#endif

+ 0
- 51
src/common/speed_print.c Целия файл

@@ -1,51 +0,0 @@
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include "cpucycles.h"
#include "speed_print.h"

static int cmp_uint64(const void *a, const void *b) {
if(*(uint64_t *)a < *(uint64_t *)b) return -1;
if(*(uint64_t *)a > *(uint64_t *)b) return 1;
return 0;
}

static uint64_t median(uint64_t *l, size_t llen) {
qsort(l,llen,sizeof(uint64_t),cmp_uint64);

if(llen%2) return l[llen/2];
else return (l[llen/2-1]+l[llen/2])/2;
}

static uint64_t average(uint64_t *t, size_t tlen) {
size_t i;
uint64_t acc=0;

for(i=0;i<tlen;i++)
acc += t[i];

return acc/tlen;
}

void print_results(const char *s, uint64_t *t, size_t tlen) {
size_t i;
static uint64_t overhead = -1;

if(tlen < 2) {
fprintf(stderr, "ERROR: Need a least two cycle counts!\n");
return;
}

if(overhead == (uint64_t)-1)
overhead = cpucycles_overhead();

tlen--;
for(i=0;i<tlen;++i)
t[i] = t[i+1] - t[i] - overhead;

printf("%s\n", s);
printf("median: %llu cycles/ticks\n", (unsigned long long)median(t, tlen));
printf("average: %llu cycles/ticks\n", (unsigned long long)average(t, tlen));
printf("\n");
}

+ 0
- 9
src/common/speed_print.h Целия файл

@@ -1,9 +0,0 @@
#ifndef PRINT_SPEED_H
#define PRINT_SPEED_H

#include <stddef.h>
#include <stdint.h>

void print_results(const char *s, uint64_t *t, size_t tlen);

#endif

+ 16
- 0
src/kem/hqc/hqc-rmrs-128/avx2/CMakeLists.txt Целия файл

@@ -0,0 +1,16 @@
set(
SRC_AVX2_HQCRMRS128
code.c
fft.c
gf2x.c
gf.c
hqc.c
kem.c
parsing.c
reed_muller.c
reed_solomon.c
vector.c
)

define_kem_alg(hqcrmrs128_avx2
PQCLEAN_HQCRMRS128_CLEAN "${SRC_AVX2_HQCRMRS128}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 25
- 0
src/kem/hqc/hqc-rmrs-128/avx2/api.h Целия файл

@@ -0,0 +1,25 @@
#ifndef PQCLEAN_HQCRMRS128_AVX2_API_H
#define PQCLEAN_HQCRMRS128_AVX2_API_H
/**
* @file api.h
* @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
*/

#define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_ALGNAME "HQC-RMRS-128"

#define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_SECRETKEYBYTES 2289
#define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_PUBLICKEYBYTES 2249
#define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_BYTES 64
#define PQCLEAN_HQCRMRS128_AVX2_CRYPTO_CIPHERTEXTBYTES 4481

// As a technicality, the public key is appended to the secret key in order to respect the NIST API.
// Without this constraint, PQCLEAN_HQCRMRS128_AVX2_CRYPTO_SECRETKEYBYTES would be defined as 32

int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


#endif

+ 47
- 0
src/kem/hqc/hqc-rmrs-128/avx2/code.c Целия файл

@@ -0,0 +1,47 @@
#include "code.h"
#include "parameters.h"
#include "reed_muller.h"
#include "reed_solomon.h"
#include <stdint.h>
#include <string.h>
/**
* @file code.c
* @brief Implementation of concatenated code
*/



/**
*
* @brief Encoding the message m to a code word em using the concatenated code
*
* First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain
* a concatenated code word.
*
* @param[out] em Pointer to an array that is the tensor code word
* @param[in] m Pointer to an array that is the message
*/
void PQCLEAN_HQCRMRS128_AVX2_code_encode(uint8_t *em, const uint8_t *m) {
uint8_t tmp[8 * VEC_N1_SIZE_64] = {0};

PQCLEAN_HQCRMRS128_AVX2_reed_solomon_encode(tmp, m);
PQCLEAN_HQCRMRS128_AVX2_reed_muller_encode(em, tmp);

}



/**
* @brief Decoding the code word em to a message m using the concatenated code
*
* @param[out] m Pointer to an array that is the message
* @param[in] em Pointer to an array that is the code word
*/
void PQCLEAN_HQCRMRS128_AVX2_code_decode(uint8_t *m, const uint8_t *em) {
uint8_t tmp[8 * VEC_N1_SIZE_64] = {0};

PQCLEAN_HQCRMRS128_AVX2_reed_muller_decode(tmp, em);
PQCLEAN_HQCRMRS128_AVX2_reed_solomon_decode(m, tmp);


}

+ 18
- 0
src/kem/hqc/hqc-rmrs-128/avx2/code.h Целия файл

@@ -0,0 +1,18 @@
#ifndef CODE_H
#define CODE_H


/**
* @file code.h
* Header file of code.c
*/
#include "parameters.h"
#include <stddef.h>
#include <stdint.h>

void PQCLEAN_HQCRMRS128_AVX2_code_encode(uint8_t *em, const uint8_t *message);

void PQCLEAN_HQCRMRS128_AVX2_code_decode(uint8_t *m, const uint8_t *em);


#endif

+ 351
- 0
src/kem/hqc/hqc-rmrs-128/avx2/fft.c Целия файл

@@ -0,0 +1,351 @@
#include "fft.h"
#include "gf.h"
#include "parameters.h"
#include <stdint.h>
#include <string.h>
/**
* @file fft.c
* Implementation of the additive FFT and its transpose.
* This implementation is based on the paper from Gao and Mateer: <br>
* Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
* IEEE Transactions on Information Theory 56 (2010), 6265--6272.
* http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
* and includes improvements proposed by Bernstein, Chou and Schwabe here:
* https://binary.cr.yp.to/mcbits-20130616.pdf
*/


static void compute_fft_betas(uint16_t *betas);
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size);
static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);


/**
* @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose
*
* @param[out] betas Array of size PARAM_M-1
*/
static void compute_fft_betas(uint16_t *betas) {
size_t i;
for (i = 0; i < PARAM_M - 1; ++i) {
betas[i] = 1 << (PARAM_M - 1 - i);
}
}



/**
* @brief Computes the subset sums of the given set
*
* The array subset_sums is such that its ith element is
* the subset sum of the set elements given by the binary form of i.
*
* @param[out] subset_sums Array of size 2^set_size receiving the subset sums
* @param[in] set Array of set_size elements
* @param[in] set_size Size of the array set
*/
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) {
uint16_t i, j;
subset_sums[0] = 0;

for (i = 0; i < set_size; ++i) {
for (j = 0; j < (1 << i); ++j) {
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j];
}
}
}



/**
* @brief Computes the radix conversion of a polynomial f in GF(2^m)[x]
*
* Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x)
* as proposed by Bernstein, Chou and Schwabe:
* https://binary.cr.yp.to/mcbits-20130616.pdf
*
* @param[out] f0 Array half the size of f
* @param[out] f1 Array half the size of f
* @param[in] f Array of size a power of 2
* @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
*/
static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
switch (m_f) {
case 4:
f0[4] = f[8] ^ f[12];
f0[6] = f[12] ^ f[14];
f0[7] = f[14] ^ f[15];
f1[5] = f[11] ^ f[13];
f1[6] = f[13] ^ f[14];
f1[7] = f[15];
f0[5] = f[10] ^ f[12] ^ f1[5];
f1[4] = f[9] ^ f[13] ^ f0[5];

f0[0] = f[0];
f1[3] = f[7] ^ f[11] ^ f[15];
f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3];
f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3];
f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3];
f1[2] = f[3] ^ f1[1] ^ f0[3];
f0[1] = f[2] ^ f0[2] ^ f1[1];
f1[0] = f[1] ^ f0[1];
break;

case 3:
f0[0] = f[0];
f0[2] = f[4] ^ f[6];
f0[3] = f[6] ^ f[7];
f1[1] = f[3] ^ f[5] ^ f[7];
f1[2] = f[5] ^ f[6];
f1[3] = f[7];
f0[1] = f[2] ^ f0[2] ^ f1[1];
f1[0] = f[1] ^ f0[1];
break;

case 2:
f0[0] = f[0];
f0[1] = f[2] ^ f[3];
f1[0] = f[1] ^ f0[1];
f1[1] = f[3];
break;

case 1:
f0[0] = f[0];
f1[0] = f[1];
break;

default:
radix_big(f0, f1, f, m_f);
break;
}
}

static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0};
uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0};

uint16_t Q0[1 << (PARAM_FFT - 2)] = {0};
uint16_t Q1[1 << (PARAM_FFT - 2)] = {0};
uint16_t R0[1 << (PARAM_FFT - 2)] = {0};
uint16_t R1[1 << (PARAM_FFT - 2)] = {0};

size_t i, n;

n = 1;
n <<= (m_f - 2);
memcpy(Q, f + 3 * n, 2 * n);
memcpy(Q + n, f + 3 * n, 2 * n);
memcpy(R, f, 4 * n);

for (i = 0; i < n; ++i) {
Q[i] ^= f[2 * n + i];
R[n + i] ^= Q[i];
}

radix(Q0, Q1, Q, m_f - 1);
radix(R0, R1, R, m_f - 1);

memcpy(f0, R0, 2 * n);
memcpy(f0 + n, Q0, 2 * n);
memcpy(f1, R1, 2 * n);
memcpy(f1 + n, Q1, 2 * n);
}



/**
* @brief Evaluates f at all subset sums of a given set
*
* This function is a subroutine of the function PQCLEAN_HQCRMRS128_AVX2_fft.
*
* @param[out] w Array
* @param[in] f Array
* @param[in] f_coeffs Number of coefficients of f
* @param[in] m Number of betas
* @param[in] m_f Number of coefficients of f (one more than its degree)
* @param[in] betas FFT constants
*/
static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
uint16_t f0[1 << (PARAM_FFT - 2)] = {0};
uint16_t f1[1 << (PARAM_FFT - 2)] = {0};
uint16_t gammas[PARAM_M - 2] = {0};
uint16_t deltas[PARAM_M - 2] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0};
uint16_t u[1 << (PARAM_M - 2)] = {0};
uint16_t v[1 << (PARAM_M - 2)] = {0};
uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0};

uint16_t beta_m_pow;
size_t i, j, k;
size_t x;

// Step 1
if (m_f == 1) {
for (i = 0; i < m; ++i) {
tmp[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas[i], f[1]);
}

w[0] = f[0];
x = 1;
for (j = 0; j < m; ++j) {
for (k = 0; k < x; ++k) {
w[x + k] = w[k] ^ tmp[j];
}
x <<= 1;
}

return;
}

// Step 2: compute g
if (betas[m - 1] != 1) {
beta_m_pow = 1;
x = 1;
x <<= m_f;
for (i = 1; i < x; ++i) {
beta_m_pow = PQCLEAN_HQCRMRS128_AVX2_gf_mul(beta_m_pow, betas[m - 1]);
f[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(beta_m_pow, f[i]);
}
}

// Step 3
radix(f0, f1, f, m_f);

// Step 4: compute gammas and deltas
for (i = 0; i + 1 < m; ++i) {
gammas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas[i], PQCLEAN_HQCRMRS128_AVX2_gf_inverse(betas[m - 1]));
deltas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_square(gammas[i]) ^ gammas[i];
}

// Compute gammas sums
compute_subset_sums(gammas_sums, gammas, m - 1);

// Step 5
fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);

k = 1;
k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small.
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant
w[0] = u[0];
w[k] = u[0] ^ f1[0];
for (i = 1; i < k; ++i) {
w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(gammas_sums[i], f1[0]);
w[k + i] = w[i] ^ f1[0];
}
} else {
fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas);

// Step 6
memcpy(w + k, v, 2 * k);
w[0] = u[0];
w[k] ^= u[0];
for (i = 1; i < k; ++i) {
w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(gammas_sums[i], v[i]);
w[k + i] ^= w[i];
}
}
}



/**
* @brief Evaluates f on all fields elements using an additive FFT algorithm
*
* f_coeffs is the number of coefficients of f (one less than its degree). <br>
* The FFT proceeds recursively to evaluate f at all subset sums of a basis B. <br>
* This implementation is based on the paper from Gao and Mateer: <br>
* Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
* IEEE Transactions on Information Theory 56 (2010), 6265--6272.
* http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
* and includes improvements proposed by Bernstein, Chou and Schwabe here:
* https://binary.cr.yp.to/mcbits-20130616.pdf <br>
* Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas,
* meaning the first gammas subset sums are actually the subset sums of betas (except 1). <br>
* Also note that f is altered during computation (twisted at each level).
*
* @param[out] w Array
* @param[in] f Array of 2^PARAM_FFT elements
* @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1)
*/
void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) {
uint16_t betas[PARAM_M - 1] = {0};
uint16_t betas_sums[1 << (PARAM_M - 1)] = {0};
uint16_t f0[1 << (PARAM_FFT - 1)] = {0};
uint16_t f1[1 << (PARAM_FFT - 1)] = {0};
uint16_t deltas[PARAM_M - 1] = {0};
uint16_t u[1 << (PARAM_M - 1)] = {0};
uint16_t v[1 << (PARAM_M - 1)] = {0};

size_t i, k;

// Follows Gao and Mateer algorithm
compute_fft_betas(betas);

// Step 1: PARAM_FFT > 1, nothing to do

// Compute gammas sums
compute_subset_sums(betas_sums, betas, PARAM_M - 1);

// Step 2: beta_m = 1, nothing to do

// Step 3
radix(f0, f1, f, PARAM_FFT);

// Step 4: Compute deltas
for (i = 0; i < PARAM_M - 1; ++i) {
deltas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_square(betas[i]) ^ betas[i];
}

// Step 5
fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);
fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);

k = 1 << (PARAM_M - 1);
// Step 6, 7 and error polynomial computation
memcpy(w + k, v, 2 * k);

// Check if 0 is root
w[0] = u[0];

// Check if 1 is root
w[k] ^= u[0];

// Find other roots
for (i = 1; i < k; ++i) {
w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas_sums[i], v[i]);
w[k + i] ^= w[i];
}
}



/**
* @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements.
*
* @param[out] error Array with the error
* @param[out] error_compact Array with the error in a compact form
* @param[in] w Array of size 2^PARAM_M
*/
void PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
uint16_t gammas[PARAM_M - 1] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
uint16_t k;
size_t i, index;

compute_fft_betas(gammas);
compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

k = 1 << (PARAM_M - 1);
error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15);
error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15);

for (i = 1; i < k; ++i) {
index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]];
error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15);

index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1];
error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15);
}
}

+ 18
- 0
src/kem/hqc/hqc-rmrs-128/avx2/fft.h Целия файл

@@ -0,0 +1,18 @@
#ifndef FFT_H
#define FFT_H


/**
* @file fft.h
* Header file of fft.c
*/

#include <stddef.h>
#include <stdint.h>

void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs);

void PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w);


#endif

+ 176
- 0
src/kem/hqc/hqc-rmrs-128/avx2/gf.c Целия файл

@@ -0,0 +1,176 @@
#include "gf.h"
#include "parameters.h"
#include <stdint.h>
/**
* @file gf.c
* Galois field implementation with multiplication using the pclmulqdq instruction
*/


static uint16_t gf_reduce(uint64_t x, size_t deg_x);



/**
* Reduces polynomial x modulo primitive polynomial GF_POLY.
* @returns x mod GF_POLY
* @param[in] x Polynomial of degree less than 64
* @param[in] deg_x The degree of polynomial x
*/
static uint16_t gf_reduce(uint64_t x, size_t deg_x) {
uint16_t z1, z2, rmdr, dist;
uint64_t mod;
size_t steps, i, j;

// Deduce the number of steps of reduction
steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2);

// Reduce
for (i = 0; i < steps; ++i) {
mod = x >> PARAM_M;
x &= (1 << PARAM_M) - 1;
x ^= mod;

z1 = 0;
rmdr = PARAM_GF_POLY ^ 1;
for (j = PARAM_GF_POLY_WT - 2; j; --j) {
z2 = __tzcnt_u16(rmdr);
dist = (uint16_t) (z2 - z1);
mod <<= dist;
x ^= mod;
rmdr ^= 1 << z2;
z1 = z2;
}
}

return x;
}



/**
* Multiplies two elements of GF(2^GF_M).
* @returns the product a*b
* @param[in] a Element of GF(2^GF_M)
* @param[in] b Element of GF(2^GF_M)
*/
uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mul(uint16_t a, uint16_t b) {
__m128i va = _mm_cvtsi32_si128(a);
__m128i vb = _mm_cvtsi32_si128(b);
__m128i vab = _mm_clmulepi64_si128(va, vb, 0);
uint32_t ab = _mm_cvtsi128_si32(vab);

return gf_reduce(ab, 2 * (PARAM_M - 1));
}



/**
* Compute 16 products in GF(2^GF_M).
* @returns the product (a0b0,a1b1,...,a15b15) , ai,bi in GF(2^GF_M)
* @param[in] a 256-bit register where a0,..,a15 are stored as 16 bit integers
* @param[in] b 256-bit register where b0,..,b15 are stored as 16 bit integer
*
*/
__m256i PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(__m256i a, __m256i b) {
__m128i al = _mm256_extractf128_si256(a, 0);
__m128i ah = _mm256_extractf128_si256(a, 1);
__m128i bl = _mm256_extractf128_si256(b, 0);
__m128i bh = _mm256_extractf128_si256(b, 1);

__m128i abl0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x0);
abl0 &= CONST128_MIDDLEMASKL;
abl0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH);

__m128i abh0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x11);
abh0 &= CONST128_MIDDLEMASKL;
abh0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH);

abl0 = _mm_shuffle_epi8(abl0, CONST128_INDEXL);
abl0 ^= _mm_shuffle_epi8(abh0, CONST128_INDEXH);

__m128i abl1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x0);
abl1 &= CONST128_MIDDLEMASKL;
abl1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH);

__m128i abh1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x11);
abh1 &= CONST128_MIDDLEMASKL;
abh1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH);

abl1 = _mm_shuffle_epi8(abl1, CONST128_INDEXL);
abl1 ^= _mm_shuffle_epi8(abh1, CONST128_INDEXH);

__m256i ret = _mm256_set_m128i(abl1, abl0);

__m256i aux = CONST256_MR0;

for (int32_t i = 0; i < 7; i++) {
ret ^= red[i] & _mm256_cmpeq_epi16((ret & aux), aux);
aux = aux << 1;
}

ret &= CONST256_LASTMASK;
return ret;
}



/**
* Squares an element of GF(2^GF_M).
* @returns a^2
* @param[in] a Element of GF(2^GF_M)
*/
uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_square(uint16_t a) {
uint32_t b = a;
uint32_t s = b & 1;
for (size_t i = 1; i < PARAM_M; ++i) {
b <<= 1;
s ^= b & (1 << 2 * i);
}

return gf_reduce(s, 2 * (PARAM_M - 1));
}



/**
* Computes the inverse of an element of GF(2^8),
* using the addition chain 1 2 3 4 7 11 15 30 60 120 127 254
* @returns the inverse of a
* @param[in] a Element of GF(2^GF_M)
*/
uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_inverse(uint16_t a) {
uint16_t inv = a;
uint16_t tmp1, tmp2;

inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(a); /* a^2 */
tmp1 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, a); /* a^3 */
inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^4 */
tmp2 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, tmp1); /* a^7 */
tmp1 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, tmp2); /* a^11 */
inv = PQCLEAN_HQCRMRS128_AVX2_gf_mul(tmp1, inv); /* a^15 */
inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^30 */
inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^60 */
inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^120 */
inv = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inv, tmp2); /* a^127 */
inv = PQCLEAN_HQCRMRS128_AVX2_gf_square(inv); /* a^254 */
return inv;
}



/**
* Returns i modulo 2^GF_M-1.
* i must be less than 2*(2^GF_M-1).
* Therefore, the return value is either i or i-2^GF_M+1.
* @returns i mod (2^GF_M-1)
* @param[in] i The integer whose modulo is taken
*/
uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mod(uint16_t i) {
uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER);

// mask = 0xffff if (i < GF_MUL_ORDER)
uint16_t mask = -(tmp >> 15);

return tmp + (mask & PARAM_GF_MUL_ORDER);
}

+ 69
- 0
src/kem/hqc/hqc-rmrs-128/avx2/gf.h Целия файл

@@ -0,0 +1,69 @@
#ifndef GF_H
#define GF_H


/**
* @file gf.h
* Header file of gf.c
*/

#include <immintrin.h>
#include <stddef.h>
#include <stdint.h>

#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)

/**
* Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8.
* The last two elements are needed by the PQCLEAN_HQCRMRS128_AVX2_gf_mul function
* (for example if both elements to multiply are zero).
*/
static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 };



/**
* Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8).
* The logarithm of 0 is set to 0 by convention.
*/
static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 };

/**
* Masks needed for the computation of 16 mult in GF(2^M)
*/
#define CONST256_MR0 _mm256_set1_epi64x((long long) 0x0100010001000100)
#define CONST256_LASTMASK _mm256_set1_epi64x((long long) 0x00ff00ff00ff00ff)
#define CONST128_MASKL _mm_set1_epi64x((long long) 0x0000ffff0000ffff)
#define CONST128_MASKH _mm_set1_epi64x((long long) 0xffff0000ffff0000)
#define CONST128_MIDDLEMASKL _mm_set1_epi64x((long long) 0x000000000000ffff)
#define CONST128_MIDDLEMASKH _mm_set1_epi64x((long long) 0x0000ffff00000000)
#define CONST128_INDEXH _mm_set_epi64x((long long) 0x0d0c090805040100, (long long) 0xffffffffffffffff)
#define CONST128_INDEXL _mm_set_epi64x((long long) 0xffffffffffffffff, (long long) 0x0d0c090805040100)

/**
* x^i modulo x^8+x^4+x^3+x^2+1 duplicate 4 times to fit a 256-bit register
*/
static const __m256i red[7] = {
{0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL},
{0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL},
{0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL},
{0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL},
{0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL},
{0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL},
{0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL},

};


uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mul(uint16_t a, uint16_t b);

__m256i PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(__m256i a, __m256i b);

uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_square(uint16_t a);

uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_inverse(uint16_t a);

uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_mod(uint16_t i);


#endif

+ 369
- 0
src/kem/hqc/hqc-rmrs-128/avx2/gf2x.c Целия файл

@@ -0,0 +1,369 @@
#include "gf2x.h"
#include "parameters.h"
#include <immintrin.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
/**
* \file gf2x.c
* \brief AVX2 implementation of multiplication of two polynomials
*/



#define VEC_N_SPLIT_3x3 CEIL_DIVIDE(CEIL_DIVIDE(PARAM_N, 9), 256)
#define VEC_N_SPLIT_3 (3*VEC_N_SPLIT_3x3)

static inline void reduce(uint64_t *o, const __m256i *a);
static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B);
static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B);
static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B);
static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B);
static inline void karat_three_way_mult(__m256i *C, const __m256i *A, const __m256i *B);
static inline void karat_mult9(__m256i *C, const aligned_vec_t *A, const aligned_vec_t *B);


/**
* @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$
*
* This function computes the modular reduction of the polynomial a(x)
*
* @param[out] o Pointer to the result
* @param[in] a Pointer to the polynomial a(x)
*/
static inline void reduce(uint64_t *o, const __m256i *a256) {
size_t i, i2;
__m256i r256, carry256;
__m256i *o256 = (__m256i *)o;
const uint64_t *a64 = (const uint64_t *)a256;
uint64_t r, carry;

i2 = 0;
for (i = (PARAM_N >> 6); i < (PARAM_N >> 5) - 4; i += 4) {
r256 = _mm256_lddqu_si256((const __m256i *) (& a64[i]));
r256 = _mm256_srli_epi64(r256, PARAM_N & 63);
carry256 = _mm256_lddqu_si256((const __m256i *) (& a64[i + 1]));
carry256 = _mm256_slli_epi64(carry256, (-PARAM_N) & 63);
r256 ^= carry256;
_mm256_storeu_si256(&o256[i2], a256[i2] ^ r256);
i2 += 1;
}

i = i - (PARAM_N >> 6);
for (; i < (PARAM_N >> 6) + 1; i++) {
r = a64[i + (PARAM_N >> 6)] >> (PARAM_N & 63);
carry = a64[i + (PARAM_N >> 6) + 1] << ((-PARAM_N) & 63);
r ^= carry;
o[i] = a64[i] ^ r;
}

o[PARAM_N >> 6] &= RED_MASK;
}



/**
* @brief Compute C(x) = A(x)*B(x)
* A(x) and B(x) are stored in 128-bit registers
* This function computes A(x)*B(x) using Karatsuba
*
* @param[out] C Pointer to the result
* @param[in] A Pointer to the polynomial A(x)
* @param[in] B Pointer to the polynomial B(x)
*/
static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B) {
__m128i D1[2];
__m128i D0[2], D2[2];
__m128i Al = _mm_loadu_si128(A);
__m128i Ah = _mm_loadu_si128(A + 1);
__m128i Bl = _mm_loadu_si128(B);
__m128i Bh = _mm_loadu_si128(B + 1);

// Compute Al.Bl=D0
__m128i DD0 = _mm_clmulepi64_si128(Al, Bl, 0);
__m128i DD2 = _mm_clmulepi64_si128(Al, Bl, 0x11);
__m128i AAlpAAh = _mm_xor_si128(Al, _mm_shuffle_epi32(Al, 0x4e));
__m128i BBlpBBh = _mm_xor_si128(Bl, _mm_shuffle_epi32(Bl, 0x4e));
__m128i DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
D0[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
D0[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

// Compute Ah.Bh=D2
DD0 = _mm_clmulepi64_si128(Ah, Bh, 0);
DD2 = _mm_clmulepi64_si128(Ah, Bh, 0x11);
AAlpAAh = _mm_xor_si128(Ah, _mm_shuffle_epi32(Ah, 0x4e));
BBlpBBh = _mm_xor_si128(Bh, _mm_shuffle_epi32(Bh, 0x4e));
DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
D2[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
D2[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

// Compute AlpAh.BlpBh=D1
// Initialisation of AlpAh and BlpBh
__m128i AlpAh = _mm_xor_si128(Al, Ah);
__m128i BlpBh = _mm_xor_si128(Bl, Bh);
DD0 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0);
DD2 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0x11);
AAlpAAh = _mm_xor_si128(AlpAh, _mm_shuffle_epi32(AlpAh, 0x4e));
BBlpBBh = _mm_xor_si128(BlpBh, _mm_shuffle_epi32(BlpBh, 0x4e));
DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
D1[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
D1[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

// Final comutation of C
__m128i middle = _mm_xor_si128(D0[1], D2[0]);
C[0] = D0[0];
C[1] = middle ^ D0[0] ^ D1[0];
C[2] = middle ^ D1[1] ^ D2[1];
C[3] = D2[1];
}



/**
* @brief Compute C(x) = A(x)*B(x)
*
* This function computes A(x)*B(x) using Karatsuba
* A(x) and B(x) are stored in 256-bit registers
* @param[out] C Pointer to the result
* @param[in] A Pointer to the polynomial A(x)
* @param[in] B Pointer to the polynomial B(x)
*/
static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B) {
__m256i D0[2], D1[2], D2[2], SAA, SBB;
const __m128i *A128 = (const __m128i *)A;
const __m128i *B128 = (const __m128i *)B;
__m256i middle;

karat_mult_1((__m128i *) D0, A128, B128);
karat_mult_1((__m128i *) D2, A128 + 2, B128 + 2);

SAA = _mm256_xor_si256(A[0], A[1]);
SBB = _mm256_xor_si256(B[0], B[1]);

karat_mult_1((__m128i *) D1, (__m128i *) &SAA, (__m128i *) &SBB);
middle = _mm256_xor_si256(D0[1], D2[0]);

C[0] = D0[0];
C[1] = middle ^ D0[0] ^ D1[0];
C[2] = middle ^ D1[1] ^ D2[1];
C[3] = D2[1];
}



/**
* @brief Compute C(x) = A(x)*B(x)
*
* This function computes A(x)*B(x) using Karatsuba
* A(x) and B(x) are stored in 256-bit registers
* @param[out] C Pointer to the result
* @param[in] A Pointer to the polynomial A(x)
* @param[in] B Pointer to the polynomial B(x)
*/
static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B) {
__m256i D0[4], D1[4], D2[4], SAA[2], SBB[2];
__m256i middle0;
__m256i middle1;

karat_mult_2(D0, A, B);
karat_mult_2(D2, A + 2, B + 2);

SAA[0] = A[0] ^ A[2];
SBB[0] = B[0] ^ B[2];
SAA[1] = A[1] ^ A[3];
SBB[1] = B[1] ^ B[3];

karat_mult_2( D1, SAA, SBB);

middle0 = _mm256_xor_si256(D0[2], D2[0]);
middle1 = _mm256_xor_si256(D0[3], D2[1]);

C[0] = D0[0];
C[1] = D0[1];
C[2] = middle0 ^ D0[0] ^ D1[0];
C[3] = middle1 ^ D0[1] ^ D1[1];
C[4] = middle0 ^ D1[2] ^ D2[2];
C[5] = middle1 ^ D1[3] ^ D2[3];
C[6] = D2[2];
C[7] = D2[3];
}



/**
* @brief Compute C(x) = A(x)*B(x)
*
* This function computes A(x)*B(x) using Karatsuba
* A(x) and B(x) are stored in 256-bit registers
* @param[out] C Pointer to the result
* @param[in] A Pointer to the polynomial A(x)
* @param[in] B Pointer to the polynomial B(x)
*/
static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B) {
size_t i, is, is2, is3;
__m256i D0[8], D1[8], D2[8], SAA[4], SBB[4];
__m256i middle;

karat_mult_4(D0, A, B);
karat_mult_4(D2, A + 4, B + 4);

for (i = 0; i < 4; i++) {
is = i + 4;
SAA[i] = A[i] ^ A[is];
SBB[i] = B[i] ^ B[is];
}

karat_mult_4(D1, SAA, SBB);

for (i = 0; i < 4; i++) {
is = i + 4;
is2 = is + 4;
is3 = is2 + 4;

middle = _mm256_xor_si256(D0[is], D2[i]);

C[i] = D0[i];
C[is] = middle ^ D0[i] ^ D1[i];
C[is2] = middle ^ D1[is] ^ D2[is];
C[is3] = D2[is];
}
}



/**
* @brief Compute C(x) = A(x)*B(x)
*
* This function computes A(x)*B(x) using Karatsuba 3 part split
* A(x) and B(x) are stored in 256-bit registers
* @param[out] C Pointer to the result
* @param[in] A Pointer to the polynomial A(x)
* @param[in] B Pointer to the polynomial B(x)
*/
static inline void karat_three_way_mult(__m256i *C, const __m256i *A, const __m256i *B) {
size_t i, j;
const __m256i *a0, *b0, *a1, *b1, *a2, *b2;
__m256i aa01[VEC_N_SPLIT_3x3], bb01[VEC_N_SPLIT_3x3], aa02[VEC_N_SPLIT_3x3], bb02[VEC_N_SPLIT_3x3], aa12[VEC_N_SPLIT_3x3], bb12[VEC_N_SPLIT_3x3];
__m256i D0[2 * VEC_N_SPLIT_3x3], D1[2 * VEC_N_SPLIT_3x3], D2[2 * VEC_N_SPLIT_3x3], D3[2 * VEC_N_SPLIT_3x3], D4[2 * VEC_N_SPLIT_3x3], D5[2 * VEC_N_SPLIT_3x3];
__m256i ro256[6 * VEC_N_SPLIT_3x3];
__m256i middle0;

a0 = A;
a1 = A + VEC_N_SPLIT_3x3;
a2 = A + (VEC_N_SPLIT_3x3 << 1);

b0 = B;
b1 = B + VEC_N_SPLIT_3x3;
b2 = B + (VEC_N_SPLIT_3x3 << 1);

for (i = 0; i < VEC_N_SPLIT_3x3; i++) {
aa01[i] = a0[i] ^ a1[i];
bb01[i] = b0[i] ^ b1[i];

aa12[i] = a2[i] ^ a1[i];
bb12[i] = b2[i] ^ b1[i];

aa02[i] = a0[i] ^ a2[i];
bb02[i] = b0[i] ^ b2[i];
}

karat_mult_8(D0, a0, b0);
karat_mult_8(D1, a1, b1);
karat_mult_8(D2, a2, b2);

karat_mult_8(D3, aa01, bb01);
karat_mult_8(D4, aa02, bb02);
karat_mult_8(D5, aa12, bb12);

for (i = 0; i < VEC_N_SPLIT_3x3; i++) {
j = i + VEC_N_SPLIT_3x3;
middle0 = D0[i] ^ D1[i] ^ D0[j];
ro256[i] = D0[i];
ro256[j] = D3[i] ^ middle0;
ro256[j + VEC_N_SPLIT_3x3] = D4[i] ^ D2[i] ^ D3[j] ^ D1[j] ^ middle0;
middle0 = D1[j] ^ D2[i] ^ D2[j];
ro256[j + (VEC_N_SPLIT_3x3 << 1)] = D5[i] ^ D4[j] ^ D0[j] ^ D1[i] ^ middle0;
ro256[i + (VEC_N_SPLIT_3x3 << 2)] = D5[j] ^ middle0;
ro256[j + (VEC_N_SPLIT_3x3 << 2)] = D2[j];
}

for (i = 0; i < 2 * VEC_N_SPLIT_3; i++) {
C[i] = ro256[i];
}
}



/**
* @brief Compute C(x) = A(x)*B(x)
*
* This function computes A(x)*B(x) using Karatsuba 3 part split
* A(x) and B(x) are stored in 256-bit registers
* @param[out] C Pointer to the result
* @param[in] A Pointer to the polynomial A(x)
* @param[in] B Pointer to the polynomial B(x)
*/
static inline void karat_mult9(__m256i *C, const aligned_vec_t *A, const aligned_vec_t *B) {
size_t i, j;
const __m256i *a0, *b0, *a1, *b1, *a2, *b2;
__m256i aa01[VEC_N_SPLIT_3], bb01[VEC_N_SPLIT_3], aa02[VEC_N_SPLIT_3], bb02[VEC_N_SPLIT_3], aa12[VEC_N_SPLIT_3], bb12[VEC_N_SPLIT_3];
__m256i D0[2 * VEC_N_SPLIT_3], D1[2 * VEC_N_SPLIT_3], D2[2 * VEC_N_SPLIT_3], D3[2 * VEC_N_SPLIT_3], D4[2 * VEC_N_SPLIT_3], D5[2 * VEC_N_SPLIT_3];
__m256i middle0;

a0 = (__m256i *)(A->arr64);
a1 = a0 + VEC_N_SPLIT_3;
a2 = a0 + (2 * VEC_N_SPLIT_3);

b0 = (__m256i *)(B->arr64);
b1 = b0 + VEC_N_SPLIT_3;
b2 = b0 + (2 * VEC_N_SPLIT_3);

for (i = 0; i < VEC_N_SPLIT_3; i++) {
aa01[i] = a0[i] ^ a1[i];
bb01[i] = b0[i] ^ b1[i];

aa12[i] = a2[i] ^ a1[i];
bb12[i] = b2[i] ^ b1[i];

aa02[i] = a0[i] ^ a2[i];
bb02[i] = b0[i] ^ b2[i];
}

karat_three_way_mult(D0, a0, b0);
karat_three_way_mult(D1, a1, b1);
karat_three_way_mult(D2, a2, b2);

karat_three_way_mult(D3, aa01, bb01);
karat_three_way_mult(D4, aa02, bb02);
karat_three_way_mult(D5, aa12, bb12);

for (i = 0; i < VEC_N_SPLIT_3; i++) {
j = i + VEC_N_SPLIT_3;
middle0 = D0[i] ^ D1[i] ^ D0[j];
C[i] = D0[i];
C[j] = D3[i] ^ middle0;
C[j + VEC_N_SPLIT_3] = D4[i] ^ D2[i] ^ D3[j] ^ D1[j] ^ middle0;
middle0 = D1[j] ^ D2[i] ^ D2[j];
C[j + (VEC_N_SPLIT_3 << 1)] = D5[i] ^ D4[j] ^ D0[j] ^ D1[i] ^ middle0;
C[i + (VEC_N_SPLIT_3 << 2)] = D5[j] ^ middle0;
C[j + (VEC_N_SPLIT_3 << 2)] = D2[j];
}
}



/**
* @brief Multiply two polynomials modulo \f$ X^n - 1\f$.
*
* This functions multiplies a dense polynomial <b>a1</b> (of Hamming weight equal to <b>weight</b>)
* and a dense polynomial <b>a2</b>. The multiplication is done modulo \f$ X^n - 1\f$.
*
* @param[out] o Pointer to the result
* @param[in] a1 Pointer to a polynomial
* @param[in] a2 Pointer to a polynomial
*/
void PQCLEAN_HQCRMRS128_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2) {
__m256i a1_times_a2[2 * PARAM_N_MULT + 1] = {0};
karat_mult9(a1_times_a2, a1, a2);
reduce(o, a1_times_a2);
}

+ 21
- 0
src/kem/hqc/hqc-rmrs-128/avx2/gf2x.h Целия файл

@@ -0,0 +1,21 @@
#ifndef GF2X_H
#define GF2X_H


/**
* @file gf2x.h
* @brief Header file for gf2x.c
*/
#include "parameters.h"
#include <immintrin.h>
#include <stdint.h>

typedef union {
uint64_t arr64[VEC_N_256_SIZE_64];
__m256i dummy;
} aligned_vec_t;

void PQCLEAN_HQCRMRS128_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2);


#endif

+ 168
- 0
src/kem/hqc/hqc-rmrs-128/avx2/hqc.c Целия файл

@@ -0,0 +1,168 @@
#include "code.h"
#include "gf2x.h"
#include "hqc.h"
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "vector.h"
#include <immintrin.h>
#include <stdint.h>
#include <string.h>
/**
* @file hqc.c
* @brief Implementation of hqc.h
*/



/**
* @brief Keygen of the HQC_PKE IND_CPA scheme
*
* The public key is composed of the syndrome <b>s</b> as well as the <b>seed</b> used to generate the vector <b>h</b>.
*
* The secret key is composed of the <b>seed</b> used to generate vectors <b>x</b> and <b>y</b>.
* As a technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] pk String containing the public key
* @param[out] sk String containing the secret key
*/
void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) {
AES_XOF_struct sk_seedexpander;
AES_XOF_struct pk_seedexpander;
uint8_t sk_seed[SEED_BYTES] = {0};
uint8_t pk_seed[SEED_BYTES] = {0};
aligned_vec_t vx = {0};
uint64_t *x = vx.arr64;
aligned_vec_t vy = {0};
uint64_t *y = vy.arr64;
aligned_vec_t vh = {0};
uint64_t *h = vh.arr64;
aligned_vec_t vs = {0};
uint64_t *s = vs.arr64;
aligned_vec_t vtmp = {0};
uint64_t *tmp = vtmp.arr64;

// Create seed_expanders for public key and secret key
randombytes(sk_seed, SEED_BYTES);
seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

randombytes(pk_seed, SEED_BYTES);
seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

// Compute secret key
PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA);

// Compute public key
PQCLEAN_HQCRMRS128_AVX2_vect_set_random(&pk_seedexpander, h);
PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp, &vy, &vh);
PQCLEAN_HQCRMRS128_AVX2_vect_add(s, x, tmp, VEC_N_256_SIZE_64);

// Parse keys to string
PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_to_string(pk, pk_seed, s);
PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_to_string(sk, sk_seed, pk);

}



/**
* @brief Encryption of the HQC_PKE IND_CPA scheme
*
* The cihertext is composed of vectors <b>u</b> and <b>v</b>.
*
* @param[out] u Vector u (first part of the ciphertext)
* @param[out] v Vector v (second part of the ciphertext)
* @param[in] m Vector representing the message to encrypt
* @param[in] theta Seed used to derive randomness required for encryption
* @param[in] pk String containing the public key
*/
void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) {
AES_XOF_struct seedexpander;
aligned_vec_t vh = {0};
uint64_t *h = vh.arr64;
aligned_vec_t vs = {0};
uint64_t *s = vs.arr64;
aligned_vec_t vr1 = {0};
uint64_t *r1 = vr1.arr64;
aligned_vec_t vr2 = {0};
uint64_t *r2 = vr2.arr64;
aligned_vec_t ve = {0};
uint64_t *e = ve.arr64;
aligned_vec_t vtmp1 = {0};
uint64_t *tmp1 = vtmp1.arr64;
aligned_vec_t vtmp2 = {0};
uint64_t *tmp2 = vtmp2.arr64;
aligned_vec_t vtmp3 = {0};
uint64_t *tmp3 = vtmp3.arr64;

// Create seed_expander from theta
seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH);

// Retrieve h and s from public key
PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_from_string(h, s, pk);

// Generate r1, r2 and e
PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R);
PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&seedexpander, r2, PARAM_OMEGA_R);
PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E);



// Compute u = r1 + r2.h
PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp1, &vr2, &vh);
PQCLEAN_HQCRMRS128_AVX2_vect_add(u, r1, tmp1, VEC_N_256_SIZE_64);

// Compute v = m.G by encoding the message
PQCLEAN_HQCRMRS128_AVX2_code_encode((uint8_t *)v, m);
PQCLEAN_HQCRMRS128_AVX2_load8_arr(v, VEC_N1N2_256_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES);
PQCLEAN_HQCRMRS128_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);

// Compute v = m.G + s.r2 + e
PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp2, &vr2, &vs);
PQCLEAN_HQCRMRS128_AVX2_vect_add(tmp3, e, tmp2, VEC_N_256_SIZE_64);
PQCLEAN_HQCRMRS128_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64);
PQCLEAN_HQCRMRS128_AVX2_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N);

}



/**
* @brief Decryption of the HQC_PKE IND_CPA scheme
*
* @param[out] m Vector representing the decrypted message
* @param[in] u Vector u (first part of the ciphertext)
* @param[in] v Vector v (second part of the ciphertext)
* @param[in] sk String containing the secret key
*/
void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) {
uint8_t pk[PUBLIC_KEY_BYTES] = {0};
aligned_vec_t vx = {0};
uint64_t *x = vx.arr64;
aligned_vec_t vy = {0};
uint64_t *y = vy.arr64;
aligned_vec_t vtmp1 = {0};
uint64_t *tmp1 = vtmp1.arr64;
aligned_vec_t vtmp2 = {0};
uint64_t *tmp2 = vtmp2.arr64;
aligned_vec_t vtmp3 = {0};
uint64_t *tmp3 = vtmp3.arr64;

// Retrieve x, y, pk from secret key
PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_from_string(x, y, pk, sk);

// Compute v - u.y
PQCLEAN_HQCRMRS128_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);
for (size_t i = 0; i < VEC_N_256_SIZE_64; i++) {
tmp2[i] = u[i];
}
PQCLEAN_HQCRMRS128_AVX2_vect_mul(tmp3, &vy, &vtmp2);
PQCLEAN_HQCRMRS128_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64);


// Compute m by decoding v - u.y
PQCLEAN_HQCRMRS128_AVX2_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_256_SIZE_64);
PQCLEAN_HQCRMRS128_AVX2_code_decode(m, (uint8_t *)tmp1);
}

+ 19
- 0
src/kem/hqc/hqc-rmrs-128/avx2/hqc.h Целия файл

@@ -0,0 +1,19 @@
#ifndef HQC_H
#define HQC_H


/**
* @file hqc.h
* @brief Functions of the HQC_PKE IND_CPA scheme
*/

#include <stdint.h>

void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk);

void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk);

void PQCLEAN_HQCRMRS128_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk);


#endif

+ 140
- 0
src/kem/hqc/hqc-rmrs-128/avx2/kem.c Целия файл

@@ -0,0 +1,140 @@
#include "api.h"
#include "fips202.h"
#include "hqc.h"
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "sha2.h"
#include "vector.h"
#include <stdint.h>
#include <string.h>
/**
* @file kem.c
* @brief Implementation of api.h
*/



/**
* @brief Keygen of the HQC_KEM IND_CAA2 scheme
*
* The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>.
*
* The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
* As a technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] pk String containing the public key
* @param[out] sk String containing the secret key
* @returns 0 if keygen is successful
*/
int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) {

PQCLEAN_HQCRMRS128_AVX2_hqc_pke_keygen(pk, sk);
return 0;
}



/**
* @brief Encapsulation of the HQC_KEM IND_CAA2 scheme
*
* @param[out] ct String containing the ciphertext
* @param[out] ss String containing the shared secret
* @param[in] pk String containing the public key
* @returns 0 if encapsulation is successful
*/
int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) {

uint8_t theta[SHA512_BYTES] = {0};
uint8_t m[VEC_K_SIZE_BYTES] = {0};
static uint64_t u[VEC_N_256_SIZE_64] = {0};
uint64_t v[VEC_N1N2_256_SIZE_64] = {0};
unsigned char d[SHA512_BYTES] = {0};
unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

// Computing m
randombytes(m, VEC_K_SIZE_BYTES);

// Computing theta
sha3_512(theta, m, VEC_K_SIZE_BYTES);

// Encrypting m
PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(u, v, m, theta, pk);

// Computing d
sha512(d, m, VEC_K_SIZE_BYTES);

// Computing shared secret
memcpy(mc, m, VEC_K_SIZE_BYTES);
PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

// Computing ciphertext
PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_to_string(ct, u, v, d);


return 0;
}



/**
* @brief Decapsulation of the HQC_KEM IND_CAA2 scheme
*
* @param[out] ss String containing the shared secret
* @param[in] ct String containing the cipĥertext
* @param[in] sk String containing the secret key
* @returns 0 if decapsulation is successful, -1 otherwise
*/
int PQCLEAN_HQCRMRS128_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) {

uint8_t result;
uint64_t u[VEC_N_256_SIZE_64] = {0};
uint64_t v[VEC_N1N2_256_SIZE_64] = {0};
unsigned char d[SHA512_BYTES] = {0};
unsigned char pk[PUBLIC_KEY_BYTES] = {0};
uint8_t m[VEC_K_SIZE_BYTES] = {0};
uint8_t theta[SHA512_BYTES] = {0};
uint64_t u2[VEC_N_256_SIZE_64] = {0};
uint64_t v2[VEC_N1N2_256_SIZE_64] = {0};
unsigned char d2[SHA512_BYTES] = {0};
unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

// Retrieving u, v and d from ciphertext
PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_from_string(u, v, d, ct);

// Retrieving pk from sk
memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);

// Decryting
PQCLEAN_HQCRMRS128_AVX2_hqc_pke_decrypt(m, u, v, sk);

// Computing theta
sha3_512(theta, m, VEC_K_SIZE_BYTES);

// Encrypting m'
PQCLEAN_HQCRMRS128_AVX2_hqc_pke_encrypt(u2, v2, m, theta, pk);

// Computing d'
sha512(d2, m, VEC_K_SIZE_BYTES);

// Computing shared secret
memcpy(mc, m, VEC_K_SIZE_BYTES);
PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_256_SIZE_64);
PQCLEAN_HQCRMRS128_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

// Abort if c != c' or d != d'
result = PQCLEAN_HQCRMRS128_AVX2_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES);
result |= PQCLEAN_HQCRMRS128_AVX2_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES);
result |= PQCLEAN_HQCRMRS128_AVX2_vect_compare(d, d2, SHA512_BYTES);
result = (uint8_t) (-((int16_t) result) >> 15);
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) {
ss[i] &= ~result;
}


return -(result & 1);
}

+ 111
- 0
src/kem/hqc/hqc-rmrs-128/avx2/parameters.h Целия файл

@@ -0,0 +1,111 @@
#ifndef HQC_PARAMETERS_H
#define HQC_PARAMETERS_H


/**
* @file parameters.h
* @brief Parameters of the HQC_KEM IND-CCA2 scheme
*/
#include "api.h"


#define CEIL_DIVIDE(a, b) (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/

/*
#define PARAM_N Define the parameter n of the scheme
#define PARAM_N1 Define the parameter n1 of the scheme (length of Reed-Solomon code)
#define PARAM_N2 Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code)
#define PARAM_N1N2 Define the length in bits of the Concatenated code
#define PARAM_OMEGA Define the parameter omega of the scheme
#define PARAM_OMEGA_E Define the parameter omega_e of the scheme
#define PARAM_OMEGA_R Define the parameter omega_r of the scheme
#define PARAM_SECURITY Define the security level corresponding to the chosen parameters
#define PARAM_DFR_EXP Define the decryption failure rate corresponding to the chosen parameters

#define SECRET_KEY_BYTES Define the size of the secret key in bytes
#define PUBLIC_KEY_BYTES Define the size of the public key in bytes
#define SHARED_SECRET_BYTES Define the size of the shared secret in bytes
#define CIPHERTEXT_BYTES Define the size of the ciphertext in bytes

#define UTILS_REJECTION_THRESHOLD Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function)
#define VEC_N_SIZE_BYTES Define the size of the array used to store a PARAM_N sized vector in bytes
#define VEC_K_SIZE_BYTES Define the size of the array used to store a PARAM_K sized vector in bytes
#define VEC_N1Y_SIZE_BYTES Define the size of the array used to store a PARAM_N1 sized vector in bytes
#define VEC_N1N2_SIZE_BYTES Define the size of the array used to store a PARAM_N1N2 sized vector in bytes

#define VEC_N_SIZE_64 Define the size of the array used to store a PARAM_N sized vector in 64 bits
#define VEC_K_SIZE_64 Define the size of the array used to store a PARAM_K sized vector in 64 bits
#define VEC_N1_SIZE_64 Define the size of the array used to store a PARAM_N1 sized vector in 64 bits
#define VEC_N1N2_SIZE_64 Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits

#define VEC_N_256_SIZE_64 Define the size of the array of 64 bits elements used to store an array of size PARAM_N considered as elements of 256 bits
#define VEC_N1N2_256_SIZE_64 Define the size of the array of 64 bits elements used to store an array of size PARAM_N1N2 considered as elements of 256 bits

#define PARAM_DELTA Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code)
#define PARAM_M Define a positive integer
#define PARAM_GF_POLY Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form
#define PARAM_GF_POLY_WT Hamming weight of PARAM_GF_POLY
#define PARAM_GF_POLY_M2 Distance between the primitive polynomial first two set bits
#define PARAM_GF_MUL_ORDER Define the size of the multiplicative group of GF(2^PARAM_M), i.e 2^PARAM_M -1
#define PARAM_K Define the size of the information bits of the Reed-Solomon code
#define PARAM_G Define the size of the generator polynomial of Reed-Solomon code
#define PARAM_FFT The additive FFT takes a 2^PARAM_FFT polynomial as input
We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24
The smallest power of 2 greater than 24+1 is 32=2^5
#define RS_POLY_COEFS Coefficients of the generator polynomial of the Reed-Solomon code

#define RED_MASK A mask fot the higher bits of a vector
#define SHA512_BYTES Define the size of SHA512 output in bytes
#define SEED_BYTES Define the size of the seed in bytes
#define SEEDEXPANDER_MAX_LENGTH Define the seed expander max length
*/

#define PARAM_N 17669
#define PARAM_N1 46
#define PARAM_N2 384
#define PARAM_N1N2 17664
#define PARAM_OMEGA 66
#define PARAM_OMEGA_E 75
#define PARAM_OMEGA_R 75
#define PARAM_SECURITY 128
#define PARAM_DFR_EXP 128

#define SECRET_KEY_BYTES PQCLEAN_HQCRMRS128_AVX2_CRYPTO_SECRETKEYBYTES
#define PUBLIC_KEY_BYTES PQCLEAN_HQCRMRS128_AVX2_CRYPTO_PUBLICKEYBYTES
#define SHARED_SECRET_BYTES PQCLEAN_HQCRMRS128_AVX2_CRYPTO_BYTES
#define CIPHERTEXT_BYTES PQCLEAN_HQCRMRS128_AVX2_CRYPTO_CIPHERTEXTBYTES

#define UTILS_REJECTION_THRESHOLD 16767881
#define VEC_N_SIZE_BYTES CEIL_DIVIDE(PARAM_N, 8)
#define VEC_K_SIZE_BYTES PARAM_K
#define VEC_N1_SIZE_BYTES PARAM_N1
#define VEC_N1N2_SIZE_BYTES CEIL_DIVIDE(PARAM_N1N2, 8)

#define VEC_N_SIZE_256 CEIL_DIVIDE(PARAM_N, 256)

#define VEC_N_SIZE_64 CEIL_DIVIDE(PARAM_N, 64)
#define VEC_K_SIZE_64 CEIL_DIVIDE(PARAM_K, 8)
#define VEC_N1_SIZE_64 CEIL_DIVIDE(PARAM_N1, 8)
#define VEC_N1N2_SIZE_64 CEIL_DIVIDE(PARAM_N1N2, 64)

#define PARAM_N_MULT (9*256*CEIL_DIVIDE(CEIL_DIVIDE(PARAM_N, 9), 256))
#define VEC_N_256_SIZE_64 (PARAM_N_MULT / 64)
#define VEC_N1N2_256_SIZE_64 (CEIL_DIVIDE(PARAM_N1N2, 256) << 2)

#define PARAM_DELTA 15
#define PARAM_M 8
#define PARAM_GF_POLY 0x11D
#define PARAM_GF_POLY_WT 5
#define PARAM_GF_POLY_M2 4
#define PARAM_GF_MUL_ORDER 255
#define PARAM_K 16
#define PARAM_G 31
#define PARAM_FFT 5
#define RS_POLY_COEFS 89,69,153,116,176,117,111,75,73,233,242,233,65,210,21,139,103,173,67,118,105,210,174,110,74,69,228,82,255,181,1

#define RED_MASK 0x1f
#define SHA512_BYTES 64
#define SEED_BYTES 40
#define SEEDEXPANDER_MAX_LENGTH 4294967295

#endif

+ 186
- 0
src/kem/hqc/hqc-rmrs-128/avx2/parsing.c Целия файл

@@ -0,0 +1,186 @@
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "vector.h"
#include <stdint.h>
#include <string.h>
/**
* @file parsing.c
* @brief Functions to parse secret key, public key and ciphertext of the HQC scheme
*/


void PQCLEAN_HQCRMRS128_AVX2_store8(unsigned char *out, uint64_t in) {
out[0] = (in >> 0x00) & 0xFF;
out[1] = (in >> 0x08) & 0xFF;
out[2] = (in >> 0x10) & 0xFF;
out[3] = (in >> 0x18) & 0xFF;
out[4] = (in >> 0x20) & 0xFF;
out[5] = (in >> 0x28) & 0xFF;
out[6] = (in >> 0x30) & 0xFF;
out[7] = (in >> 0x38) & 0xFF;
}


uint64_t PQCLEAN_HQCRMRS128_AVX2_load8(const unsigned char *in) {
uint64_t ret = in[7];

for (int8_t i = 6; i >= 0; i--) {
ret <<= 8;
ret |= in[i];
}

return ret;
}

void PQCLEAN_HQCRMRS128_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) {
size_t index_in = 0;
size_t index_out = 0;

// first copy by 8 bytes
if (inlen >= 8 && outlen >= 1) {
while (index_out < outlen && index_in + 8 <= inlen) {
out64[index_out] = PQCLEAN_HQCRMRS128_AVX2_load8(in8 + index_in);

index_in += 8;
index_out += 1;
}
}

// we now need to do the last 7 bytes if necessary
if (index_in >= inlen || index_out >= outlen) {
return;
}
out64[index_out] = in8[inlen - 1];
for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) {
out64[index_out] <<= 8;
out64[index_out] |= in8[index_in + i];
}
}

void PQCLEAN_HQCRMRS128_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) {
for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) {
out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF;
index_out++;
if (index_out % 8 == 0) {
index_in++;
}
}
}


/**
* @brief Parse a secret key into a string
*
* The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
* As technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] sk String containing the secret key
* @param[in] sk_seed Seed used to generate the secret key
* @param[in] pk String containing the public key
*/
void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) {
memcpy(sk, sk_seed, SEED_BYTES);
sk += SEED_BYTES;
memcpy(sk, pk, PUBLIC_KEY_BYTES);
}

/**
* @brief Parse a secret key from a string
*
* The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
* As technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] x uint64_t representation of vector x
* @param[out] y uint64_t representation of vector y
* @param[out] pk String containing the public key
* @param[in] sk String containing the secret key
*/
void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk) {
AES_XOF_struct sk_seedexpander;
uint8_t sk_seed[SEED_BYTES] = {0};

memcpy(sk_seed, sk, SEED_BYTES);
sk += SEED_BYTES;
memcpy(pk, sk, PUBLIC_KEY_BYTES);

seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA);
}

/**
* @brief Parse a public key into a string
*
* The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
*
* @param[out] pk String containing the public key
* @param[in] pk_seed Seed used to generate the public key
* @param[in] s uint8_t representation of vector s
*/
void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) {
memcpy(pk, pk_seed, SEED_BYTES);
PQCLEAN_HQCRMRS128_AVX2_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64);
}



/**
* @brief Parse a public key from a string
*
* The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
*
* @param[out] h uint8_t representation of vector h
* @param[out] s uint8_t representation of vector s
* @param[in] pk String containing the public key
*/
void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) {
AES_XOF_struct pk_seedexpander;
uint8_t pk_seed[SEED_BYTES] = {0};

memcpy(pk_seed, pk, SEED_BYTES);
pk += SEED_BYTES;
PQCLEAN_HQCRMRS128_AVX2_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES);

seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
PQCLEAN_HQCRMRS128_AVX2_vect_set_random(&pk_seedexpander, h);
}


/**
* @brief Parse a ciphertext into a string
*
* The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
*
* @param[out] ct String containing the ciphertext
* @param[in] u uint8_t representation of vector u
* @param[in] v uint8_t representation of vector v
* @param[in] d String containing the hash d
*/
void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) {
PQCLEAN_HQCRMRS128_AVX2_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
ct += VEC_N_SIZE_BYTES;
PQCLEAN_HQCRMRS128_AVX2_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
ct += VEC_N1N2_SIZE_BYTES;
memcpy(ct, d, SHA512_BYTES);
}


/**
* @brief Parse a ciphertext from a string
*
* The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
*
* @param[out] u uint8_t representation of vector u
* @param[out] v uint8_t representation of vector v
* @param[out] d String containing the hash d
* @param[in] ct String containing the ciphertext
*/
void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) {
PQCLEAN_HQCRMRS128_AVX2_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES);
ct += VEC_N_SIZE_BYTES;
PQCLEAN_HQCRMRS128_AVX2_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES);
ct += VEC_N1N2_SIZE_BYTES;
memcpy(d, ct, SHA512_BYTES);
}

+ 36
- 0
src/kem/hqc/hqc-rmrs-128/avx2/parsing.h Целия файл

@@ -0,0 +1,36 @@
#ifndef PARSING_H
#define PARSING_H


/**
* @file parsing.h
* @brief Header file for parsing.c
*/

#include <stdint.h>

void PQCLEAN_HQCRMRS128_AVX2_store8(unsigned char *out, uint64_t in);

uint64_t PQCLEAN_HQCRMRS128_AVX2_load8(const unsigned char *in);

void PQCLEAN_HQCRMRS128_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen);

void PQCLEAN_HQCRMRS128_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen);


void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk);

void PQCLEAN_HQCRMRS128_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk);


void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s);

void PQCLEAN_HQCRMRS128_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk);


void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d);

void PQCLEAN_HQCRMRS128_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct);


#endif

+ 389
- 0
src/kem/hqc/hqc-rmrs-128/avx2/reed_muller.c Целия файл

@@ -0,0 +1,389 @@
#include "parameters.h"
#include "reed_muller.h"
#include <immintrin.h>
#include <stdint.h>
#include <string.h>
/**
* @file reed_muller.c
* Constant time implementation of Reed-Muller code RM(1,7)
*/


// number of repeated code words
#define MULTIPLICITY CEIL_DIVIDE(PARAM_N2, 128)

// copy bit 0 into all bits of a 64 bit value
#define BIT0MASK(x) (int64_t)(-((x) & 1))

static void encode(uint8_t *word, uint8_t message);
static void expand_and_sum(__m256i *dst, const uint64_t *src);
static void hadamard(__m256i *src, __m256i *dst);
static uint32_t find_peaks(__m256i *transform);



/**
* @brief Encode a single byte into a single codeword using RM(1,7)
*
* Encoding matrix of this code:
* bit pattern (note that bits are numbered big endian)
* 0 aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
* 1 cccccccc cccccccc cccccccc cccccccc
* 2 f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0
* 3 ff00ff00 ff00ff00 ff00ff00 ff00ff00
* 4 ffff0000 ffff0000 ffff0000 ffff0000
* 5 00000000 ffffffff 00000000 ffffffff
* 6 00000000 00000000 ffffffff ffffffff
* 7 ffffffff ffffffff ffffffff ffffffff
*
* @param[out] word An RM(1,7) codeword
* @param[in] message A message to encode
*/
static void encode(uint8_t *word, uint8_t message) {
uint32_t e;
// bit 7 flips all the bits, do that first to save work
e = BIT0MASK(message >> 7);
// bits 0, 1, 2, 3, 4 are the same for all four longs
// (Warning: in the bit matrix above, low bits are at the left!)
e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa;
e ^= BIT0MASK(message >> 1) & 0xcccccccc;
e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0;
e ^= BIT0MASK(message >> 3) & 0xff00ff00;
e ^= BIT0MASK(message >> 4) & 0xffff0000;
// we can store this in the first quarter
word[0 + 0] = (e >> 0x00) & 0xff;
word[0 + 1] = (e >> 0x08) & 0xff;
word[0 + 2] = (e >> 0x10) & 0xff;
word[0 + 3] = (e >> 0x18) & 0xff;
// bit 5 flips entries 1 and 3; bit 6 flips 2 and 3
e ^= BIT0MASK(message >> 5);
word[4 + 0] = (e >> 0x00) & 0xff;
word[4 + 1] = (e >> 0x08) & 0xff;
word[4 + 2] = (e >> 0x10) & 0xff;
word[4 + 3] = (e >> 0x18) & 0xff;
e ^= BIT0MASK(message >> 6);
word[12 + 0] = (e >> 0x00) & 0xff;
word[12 + 1] = (e >> 0x08) & 0xff;
word[12 + 2] = (e >> 0x10) & 0xff;
word[12 + 3] = (e >> 0x18) & 0xff;
e ^= BIT0MASK(message >> 5);
word[8 + 0] = (e >> 0x00) & 0xff;
word[8 + 1] = (e >> 0x08) & 0xff;
word[8 + 2] = (e >> 0x10) & 0xff;
word[8 + 3] = (e >> 0x18) & 0xff;
}



/**
* @brief Add multiple codewords into expanded codeword
*
* Note: this does not write the codewords as -1 or +1 as the green machine does
* instead, just 0 and 1 is used.
* The resulting hadamard transform has:
* all values are halved
* the first entry is 64 too high
*
* @param[out] dst Structure that contain the expanded codeword
* @param[in] src Structure that contain the codeword
*/
inline void expand_and_sum(__m256i *dst, const uint64_t *src) {
uint16_t v[16];
for (size_t part = 0; part < 8; part++) {
dst[part] = _mm256_setzero_si256();
}
for (size_t copy = 0; copy < MULTIPLICITY; copy++) {
for (size_t part = 0; part < 8; part++) {
for (size_t bit = 0; bit < 16; bit++) {
v[bit] = (((uint16_t *)(&src[2 * copy]))[part] >> bit) & 1;
}
dst[part] += _mm256_set_epi16(v[15], v[14], v[13], v[12], v[11], v[10], v[9], v[8],
v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
}
}
}



/**
* @brief Hadamard transform
*
* Perform hadamard transform of src and store result in dst
* src is overwritten: it is also used as intermediate buffer
* Method is best explained if we use H(3) instead of H(7):
*
* The routine multiplies by the matrix H(3):
* [1 1 1 1 1 1 1 1]
* [1 -1 1 -1 1 -1 1 -1]
* [1 1 -1 -1 1 1 -1 -1]
* [a b c d e f g h] * [1 -1 -1 1 1 -1 -1 1] = result of routine
* [1 1 1 1 -1 -1 -1 -1]
* [1 -1 1 -1 -1 1 -1 1]
* [1 1 -1 -1 -1 -1 1 1]
* [1 -1 -1 1 -1 1 1 -1]
* You can do this in three passes, where each pass does this:
* set lower half of buffer to pairwise sums,
* and upper half to differences
* index 0 1 2 3 4 5 6 7
* input: a, b, c, d, e, f, g, h
* pass 1: a+b, c+d, e+f, g+h, a-b, c-d, e-f, g-h
* pass 2: a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h
* pass 3: a+b+c+d+e+f+g+h a+b-c-d+e+f-g-h a+b+c+d-e-f-g-h a+b-c-d-e+-f+g+h
* a-b+c-d+e-f+g-h a-b-c+d+e-f-g+h a-b+c-d-e+f-g+h a-b-c+d-e+f+g-h
* This order of computation is chosen because it vectorises well.
* Likewise, this routine multiplies by H(7) in seven passes.
*
* @param[out] src Structure that contain the expanded codeword
* @param[out] dst Structure that contain the expanded codeword
*/
inline void hadamard(__m256i *src, __m256i *dst) {
// the passes move data:
// src -> dst -> src -> dst -> src -> dst -> src -> dst
// using p1 and p2 alternately
__m256i *p1 = src;
__m256i *p2 = dst;
__m256i *p3;
for (size_t pass = 0; pass < 7; pass++) {
// warning: hadd works "within lanes" as Intel call it
// so you have to swap the middle 64 bit blocks of the result
for (size_t part = 0; part < 4; part++) {
p2[part] = _mm256_permute4x64_epi64(_mm256_hadd_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8);
p2[part + 4] = _mm256_permute4x64_epi64(_mm256_hsub_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8);
}
// swap p1, p2 for next round
p3 = p1;
p1 = p2;
p2 = p3;
}
}



/**
* @brief Finding the location of the highest value
*
* This is the final step of the green machine: find the location of the highest value,
* and add 128 if the peak is positive
* Notes on decoding
* The standard "Green machine" decoder words as follows:
* if the received codeword is W, compute (2 * W - 1) * H7
* The entries of the resulting vector are always even and vary from
* -128 (= the complement is a code word, add bit 7 to decode)
* via 0 (this is a different codeword)
* to 128 (this is the code word).
*
* Our decoding differs in two ways:
* - We take W instead of 2 * W - 1 (so the entries are 0,1 instead of -1,1)
* - We take the sum of the repititions (so the entries are 0..MULTIPLICITY)
* This implies that we have to subtract 64M (M=MULTIPLICITY)
* from the first entry to make sure the first codewords is handled properly
* and that the entries vary from -64M to 64M.
* -64M or 64M stands for a perfect codeword.
* If there are fewer than 32M errors, there is always a unique codeword
* which an entry with absolute value > 32M;
* this is because an error changes an entry by 1.
* The highest number that seem to be decodable is 50 errors, so that the
* highest entries in the hadamard transform can be as low as 12.
* But this is different for the repeated code.
* Because multiple codewords are added, this changes: the lowest value of the
* hadamard transform of the sum of six words is seen to be as low as 43 (!),
* which is way less than 12*6.
*
* It is possible that there are more errors, but the word is still uniquely
* decodable: we found a word with distance of 50 from the nearest codeword.
* That means that the highest entry can be as low as 14M.
* Since we have to do binary search, we search for the range 1-64M
* which can be done in 6+l2g(M) steps.
* The binary search is based on (values>32M are unique):
* M 32M min> max> firstStep #steps
* 2 64 1 64 33 +- 16 6
* 4 128 1 128 65 +- 32 7
* 6 192 1 192 129 +- 64 8
*
* As a check, we run a sample for M=6 to see the peak value; it ranged
* from 43 to 147, so my analysis looks right. Also, it shows that decoding
* far beyond the bound of 32M is needed.
*
* For the vectors, it would be tempting to use 8 bit ints,
* because the values "almost" fit in there.
* We could use some trickery to fit it in 8 bits, like saturated add or
* division by 2 in a late step.
* Unfortunately, these instructions do not exist.
* the adds _mm512_adds_epi8 is available only on the latest processors,
* and division, shift, mulhi are not available at all for 8 bits.
* So, we use 16 bit ints.
*
* For the search of the optimal comparison value,
* remember the transform contains 64M-d,
* where d are the distances to the codewords.
* The highest value gives the most likely codeword.
* There is not fast vectorized way to find this value, so we search for the
* maximum value itself.
* In each pass, we collect a bit map of the transform values that are,
* say >bound. There are three cases:
* bit map = 0: all code words are further away than 64M-bound (decrease bound)
* bit map has one bit: one unique code word has distance < 64M-bound
* bit map has multiple bits: multiple words (increase bound)
* We will search for the lowest value of bound that gives a nonzero bit map.
*
* @param[in] transform Structure that contain the expanded codeword
*/
inline uint32_t find_peaks(__m256i *transform) {
// a whole lot of vector variables
__m256i bitmap, abs_rows[8], bound, active_row, max_abs_rows;
__m256i tmp = _mm256_setzero_si256();
__m256i vect_mask;
__m256i res;
int32_t lower;
int32_t width;
uint32_t message;
uint32_t mask;
int8_t index;
int8_t abs_value;
int8_t mask1;
int8_t mask2;
uint16_t result;

// compute absolute value of transform
for (size_t i = 0; i < 8; i++) {
abs_rows[i] = _mm256_abs_epi16(transform[i]);
}
// compute a vector of 16 elements which contains the maximum somewhere
// (later used to compute bits 0 through 3 of message)
max_abs_rows = abs_rows[0];
for (size_t i = 1; i < 8; i++) {
max_abs_rows = _mm256_max_epi16(max_abs_rows, abs_rows[i]);
}

// do binary search for the highest value that is lower than the maximum
// loop invariant: lower gives bit map = 0, lower + width gives bit map > 0
lower = 1;
// this gives 64, 128 or 256 for MULTIPLICITY = 2, 4, 6
width = 1 << (5 + MULTIPLICITY / 2);
// if you don't unroll this loop, it fits in the loop cache
// uncomment the line below to speeding up the program by a few percent
// #pragma GCC unroll 0
while (width > 1) {
width >>= 1;
// compare with lower + width; put result in bitmap
// make vector from value of new bound
bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width));
bitmap = _mm256_cmpgt_epi16(max_abs_rows, bound);
// step up if there are any matches
// rely on compiler to use conditional move here
mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap);
mask = ~(uint32_t) ((-(int64_t) mask) >> 63);
lower += mask & width;
}
// lower+width contains the maximum value of the vector
// or less, if the maximum is very high (which is OK)
// normally, there is one maximum, but sometimes there are more
// find where the maxima occur in the maximum vector
// (each determines lower 4 bits of peak position)
// construct vector filled with bound-1
bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width - 1));

// find in which of the 8 groups a maximum occurs to compute bits 4, 5, 6 of message
// find lowest value by searching backwards skip first check to save time
message = 0x70;
for (size_t i = 0; i < 8; i++) {
bitmap = _mm256_cmpgt_epi16(abs_rows[7 - i], bound);
mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap);
mask = ~(uint32_t) ((-(int64_t) mask) >> 63);
message ^= mask & (message ^ ((7 - i) << 4));
}
// we decided which row of the matrix contains the lowest match
// select proper row
index = message >> 4;

tmp = _mm256_setzero_si256();
for (size_t i = 0; i < 8; i++) {
abs_value = (int8_t)(index - i);
mask1 = abs_value >> 7;
abs_value ^= mask1;
abs_value -= mask1;
mask2 = ((uint8_t) - abs_value >> 7);
mask = (-1ULL) + mask2;
vect_mask = _mm256_set1_epi32(mask);
res = _mm256_and_si256(abs_rows[i], vect_mask);
tmp = _mm256_or_si256(tmp, res);
}

active_row = tmp;

// get the column number of the vector element
// by setting the bits corresponding to the columns
// and then adding elements within two groups of 8
vect_mask = _mm256_cmpgt_epi16(active_row, bound);
vect_mask &= _mm256_set_epi16(-32768, 16384, 8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1);
for (size_t i = 0; i < 3; i++) {
vect_mask = _mm256_hadd_epi16(vect_mask, vect_mask);
}
// add low 4 bits of message
message |= __tzcnt_u16(_mm256_extract_epi16(vect_mask, 0) + _mm256_extract_epi16(vect_mask, 8));

// set bit 7 if sign of biggest value is positive
// make sure a jump isn't generated by the compiler
tmp = _mm256_setzero_si256();
for (size_t i = 0; i < 8; i++) {
mask = ~(uint32_t) ((-(int64_t)(i ^ message / 16)) >> 63);
vect_mask = _mm256_set1_epi32(mask);
tmp = _mm256_or_si256(tmp, _mm256_and_si256(vect_mask, transform[i]));
}
result = 0;
for (size_t i = 0; i < 16; i++) {
mask = ~(uint32_t) ((-(int64_t)(i ^ message % 16)) >> 63);
result |= mask & ((uint16_t *)&tmp)[i];
}
message |= (0x8000 & ~result) >> 8;
return message;
}



/**
* @brief Encodes the received word
*
* The message consists of N1 bytes each byte is encoded into PARAM_N2 bits,
* or MULTIPLICITY repeats of 128 bits
*
* @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message
* @param[in] msg Array of size VEC_N1_SIZE_64 storing the message
*/
void PQCLEAN_HQCRMRS128_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) {
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
// encode first word
encode(&cdw[16 * i * MULTIPLICITY], msg[i]);
// copy to other identical codewords
for (size_t copy = 1; copy < MULTIPLICITY; copy++) {
memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16);
}
}
}



/**
* @brief Decodes the received word
*
* Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane.
* The theory of error-correcting codes codes @cite macwilliams1977theory
*
* @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message
* @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word
*/
void PQCLEAN_HQCRMRS128_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) {
__m256i expanded[8];
__m256i transform[8];
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
// collect the codewords
expand_and_sum(expanded, (uint64_t *)&cdw[16 * i * MULTIPLICITY]);
// apply hadamard transform
hadamard(expanded, transform);
// fix the first entry to get the half Hadamard transform
transform[0] -= _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 64 * MULTIPLICITY);
// finish the decoding
msg[i] = find_peaks(transform);
}
}

+ 18
- 0
src/kem/hqc/hqc-rmrs-128/avx2/reed_muller.h Целия файл

@@ -0,0 +1,18 @@
#ifndef REED_MULLER_H
#define REED_MULLER_H


/**
* @file reed_muller.h
* Header file of reed_muller.c
*/
#include "parameters.h"
#include <stddef.h>
#include <stdint.h>

void PQCLEAN_HQCRMRS128_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg);

void PQCLEAN_HQCRMRS128_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw);


#endif

+ 466
- 0
src/kem/hqc/hqc-rmrs-128/avx2/reed_solomon.c Целия файл

@@ -0,0 +1,466 @@
#include "fft.h"
#include "gf.h"
#include "parameters.h"
#include "parsing.h"
#include "reed_solomon.h"
#include <stdint.h>
#include <stdio.h>
#include <string.h>
/**
* @file reed_solomon.c
* Constant time implementation of Reed-Solomon codes
*/


static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw);
static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
static void compute_roots(uint8_t *error, uint16_t *sigma);
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes);
static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error);
static void correct_errors(uint8_t *cdw, const uint16_t *error_values);

static const __m256i alpha_ij256_1[45] = {
{0x0010000800040002, 0x001d008000400020, 0x00cd00e80074003a, 0x004c002600130087},
{0x001d004000100004, 0x004c001300cd0074, 0x008f00ea00b4002d, 0x009d006000180006},
{0x00cd003a00400008, 0x008f0075002d0026, 0x002500270060000c, 0x004600c100b50035},
{0x004c00cd001d0010, 0x009d0018008f00b4, 0x004600ee006a0025, 0x005f00b9005d0014},
{0x00b4002600740020, 0x006a009c00600003, 0x00b900a0000500c1, 0x00fd000f005e00be},
{0x008f002d00cd0040, 0x004600b500250060, 0x0065006100b90050, 0x00d900df006b0078},
{0x0018007500130080, 0x005d008c00b5009c, 0x006b003c005e00a1, 0x0081001a004300a3},
{0x009d008f004c001d, 0x005f005d0046006a, 0x00d900fe00fd0065, 0x0085003b0081000d},
{0x0025000c002d003a, 0x006500a1005000c1, 0x00d0008600df00e7, 0x00a800a9006600ed},
{0x006a006000b40074, 0x00fd005e00b90005, 0x003b0067001100df, 0x00e600550084002e},
{0x00ee002700ea00e8, 0x00fe003c006100a0, 0x00b8007600670086, 0x00e3009100390054},
{0x00460025008f00cd, 0x00d9006b006500b9, 0x00a800b8003b00d0, 0x0082009600fc00e4},
{0x0014003500060087, 0x000d00a3007800be, 0x00e40054002e00ed, 0x00510064006200e5},
{0x005d00b500180013, 0x00810043006b005e, 0x00fc003900840066, 0x0012005900c80062},
{0x00b900c100600026, 0x003b001a00df000f, 0x00960091005500a9, 0x002c002400590064},
{0x005f0046009d004c, 0x0085008100d900fd, 0x008200e300e600a8, 0x0002002c00120051},
{0x0099000a004e0098, 0x004f0093004400d6, 0x00dd00dc00d70092, 0x00980001000b0045},
{0x006500500025002d, 0x00a8006600d000df, 0x00c30007009600bf, 0x0027002600ad00fb},
{0x001e00ba0094005a, 0x0049006d003e00e2, 0x003d00a200ae00b3, 0x008c006000e80083},
{0x00fd00b9006a00b4, 0x00e60084003b0011, 0x002c00ac001c0096, 0x00be00c100030020},
{0x006b00a100b50075, 0x00fc00290066001a, 0x00ad00f500590057, 0x00e700b90035002d},
{0x00fe006100ee00ea, 0x00e3003900b80067, 0x003a00b000ac0007, 0x00af000f002800c0},
{0x005b002f009f00c9, 0x009500d10021007c, 0x0075004700f400a6, 0x001f00df00c200ee},
{0x00d900650046008f, 0x008200fc00a8003b, 0x0027003a002c00c3, 0x0017001a00e700ba},
{0x0011000f00050003, 0x001c00ff00550033, 0x00c100b4006c0024, 0x004d003b00e2005e},
{0x000d007800140006, 0x0051006200e4002e, 0x00ba00c0002000fb, 0x00d100a900bd00bb},
{0x00d000e70050000c, 0x00c3005700bf00a9, 0x002f00b50026007d, 0x00db005500c500d9},
{0x0081006b005d0018, 0x001200c800fc0084, 0x00e70028000300ad, 0x00190091009e00bd},
{0x00f8007f00690030, 0x00f700e000f1004d, 0x00b6005f009c0040, 0x00a2009600aa00ec},
{0x003b00df00b90060, 0x002c005900960055, 0x001a000f00c10026, 0x00240064009100a9},
{0x009700b600de00c0, 0x001b009b006e0072, 0x00ed00b100a0008f, 0x00580059004b0052},
{0x008500d9005f009d, 0x00020012008200e6, 0x001700af00be0027, 0x00040024001900d1},
{0x00b8008600610027, 0x003a00f500070091, 0x001500d0000f00b5, 0x002d002c00a600f1},
{0x004f00440099004e, 0x0098000b00dd00d7, 0x0092009300d6000a, 0x004e0001004500dc},
{0x0084001a005e009c, 0x000300e9005900ff, 0x0091002e00e200b9, 0x0005002600eb001c},
{0x00a800d000650025, 0x002700ad00c30096, 0x00db0015001a002f, 0x00610060003600f2},
{0x005200ce0089004a, 0x00d40010008a0037, 0x00570049007c0078, 0x00d300c1001d0048},
{0x0049003e001e0094, 0x008c00e8003d00ae, 0x003800630033007f, 0x004300b900ea0016},
{0x00e400ed00780035, 0x00ba002d00fb0064, 0x00f200f100a900d9, 0x003e000f002500ad},
{0x00e6003b00fd006a, 0x00be0003002c001c, 0x00240037004d001a, 0x002e00df00050074},
{0x00c600c500d300d4, 0x00ca009d00cf00a7, 0x008b00c80072003e, 0x009a001a005f00c9},
{0x00fc0066006b00b5, 0x00e7003500ad0059, 0x003600a6009100c5, 0x00bf003b00780025},
{0x007b001700b10077, 0x00e1009f000800ef, 0x0040002b00ff00b8, 0x00ab00a9005b008c},
{0x00e300b800fe00ee, 0x00af0028003a00ac, 0x002d007a00370015, 0x00320055003400de},
{0x009600a900df00c1, 0x001a00b900260024, 0x0060002c00640055, 0x00590091003b000f}
};
static const __m256i alpha_ij256_2[45] = {
{0x00b4005a002d0098, 0x008f00c900ea0075, 0x0018000c00060003, 0x0000000000600030},
{0x006a00940025004e, 0x0046009f00ee00b5, 0x005d005000140005, 0x0000000000b90069},
{0x00b900ba0050000a, 0x0065002f006100a1, 0x006b00e70078000f, 0x0000000000df007f},
{0x00fd001e00650099, 0x00d9005b00fe006b, 0x008100d0000d0011, 0x00000000003b00f8},
{0x001100e200df00d6, 0x003b007c0067001a, 0x008400a9002e0033, 0x000000000055004d},
{0x003b003e00d00044, 0x00a8002100b80066, 0x00fc00bf00e40055, 0x00000000009600f1},
{0x0084006d00660093, 0x00fc00d100390029, 0x00c80057006200ff, 0x00000000005900e0},
{0x00e6004900a8004f, 0x0082009500e300fc, 0x001200c30051001c, 0x00000000002c00f7},
{0x009600b300bf0092, 0x00c300a600070057, 0x00ad007d00fb0024, 0x0000000000260040},
{0x001c00ae009600d7, 0x002c00f400ac0059, 0x000300260020006c, 0x0000000000c1009c},
{0x00ac00a2000700dc, 0x003a004700b000f5, 0x002800b500c000b4, 0x00000000000f005f},
{0x002c003d00c300dd, 0x00270075003a00ad, 0x00e7002f00ba00c1, 0x00000000001a00b6},
{0x0020008300fb0045, 0x00ba00ee00c0002d, 0x00bd00d900bb005e, 0x0000000000a900ec},
{0x000300e800ad000b, 0x00e700c200280035, 0x009e00c500bd00e2, 0x00000000009100aa},
{0x00c1006000260001, 0x001a00df000f00b9, 0x0091005500a9003b, 0x0000000000640096},
{0x00be008c00270098, 0x0017001f00af00e7, 0x001900db00d1004d, 0x00000000002400a2},
{0x00d60099000a004e, 0x0092004f00930044, 0x004500dd00dc00d7, 0x000000000001000b},
{0x001a007f002f000a, 0x00db0073001500c5, 0x003600f500f20064, 0x00000000006000cd},
{0x00330034007f0099, 0x00380062006300a8, 0x00ea0008001600ac, 0x0000000000b900d4},
{0x004d0033001a00d6, 0x002400a700370091, 0x00050060007400e9, 0x0000000000df005e},
{0x009100a800c50044, 0x0036003d00a6006e, 0x007800ba00250026, 0x00000000003b0086},
{0x0037006300150093, 0x002d00d8007a00a6, 0x0034006b00de006a, 0x0000000000550085},
{0x00a700620073004f, 0x00b5005a00d8003d, 0x00da00ce00fe00be, 0x00000000009600d5},
{0x0024003800db0092, 0x006100b5002d0036, 0x00bf0021003e00df, 0x000000000059006e},
{0x00e900ac006400d7, 0x00df00be006a0026, 0x00ae00910084007c, 0x00000000002c00ef},
{0x0074001600f200dc, 0x003e00fe00de0025, 0x002b0082003f0084, 0x00000000002600fa},
{0x0060000800f500dd, 0x002100ce006b00ba, 0x00cf005600820091, 0x0000000000c1002d},
{0x000500ea00360045, 0x00bf00da00340078, 0x005a00cf002b00ae, 0x00000000000f0023},
{0x005e00d400cd000b, 0x006e00d500850086, 0x0023002d00fa00ef, 0x00000000001a001e},
{0x00df00b900600001, 0x005900960055003b, 0x000f00c10026002c, 0x0000000000a9001a},
{0x006700f000460098, 0x00fb00e0007b0015, 0x0088006500d40074, 0x00000000009100da},
{0x002e00430061004e, 0x00080048003200bf, 0x005c008600c2009c, 0x0000000000640063},
{0x005500ed006b000a, 0x000c003600c300c4, 0x0073006600b600b9, 0x0000000000240082},
{0x00d7004f00440099, 0x000a0098000b00dd, 0x00dc0092009300d6, 0x0000000000010045},
{0x00ae0072003b00d6, 0x000f006a00200024, 0x00ef0096004d0067, 0x000000000060006c},
{0x005900f100210044, 0x008600a1000c00cf, 0x007d00a600b300a9, 0x0000000000b9008f},
{0x00f4001900e40093, 0x00c500b1008c00cd, 0x004c00fb008d00e6, 0x0000000000df0028},
{0x006c007900f1004f, 0x002900bd00bc0027, 0x00ee004000090037, 0x00000000003b00d3},
{0x002600f500820092, 0x00b300b800b60050, 0x0065002700360059, 0x00000000005500ce},
{0x009c006c005900d7, 0x00640072007c000f, 0x001100b900b400eb, 0x0000000000960084},
{0x00a00013003d00dc, 0x005600ab009e00d9, 0x0085007f009f0020, 0x00000000005900e5},
{0x000f002700cf00dd, 0x007d0038007300ed, 0x00e4003e00650060, 0x00000000002c0007},
{0x00e20014003a0045, 0x00cd001200310021, 0x00950015004300a0, 0x0000000000260090},
{0x007c00bc000c000b, 0x0025008300e00073, 0x007900fc009700fd, 0x0000000000c10002},
{0x00a900df00c10001, 0x00b9002600240096, 0x002c00640055001a, 0x00000000000f0060}
};


/**
* @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes
*
* Following @cite lin1983error (Chapter 4 - Cyclic Codes),
* We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register
* with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code.
*
* @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message
* @param[in] msg Array of size VEC_K_SIZE_64 storing the message
*/
void PQCLEAN_HQCRMRS128_AVX2_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) {
size_t i, k;
uint8_t gate_value = 0;
uint8_t prev, x;

union {
uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)];
__m256i dummy;
} tmp = {0};

union {
uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)];
__m256i dummy;
} PARAM_RS_POLY = {{ RS_POLY_COEFS }};

__m256i *tmp256 = (__m256i *)tmp.arr16;
__m256i *param256 = (__m256i *)PARAM_RS_POLY.arr16;

for (i = 0; i < PARAM_K; ++i) {
gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]);
_mm256_storeu_si256(&tmp256[0], PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[0]));
_mm256_storeu_si256(&tmp256[1], PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[1]));

prev = 0;
for (k = 0; k < PARAM_N1 - PARAM_K; k++) {
x = cdw[k];
cdw[k] = (uint8_t) (prev ^ tmp.arr16[k]);
prev = x;
}
}

memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K);
}



/**
* @brief Computes 2 * PARAM_DELTA syndromes
*
* @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes
* @param[in] cdw Array of size PARAM_N1 storing the received vector
*/
void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) {
__m256i *syndromes256 = (__m256i *) syndromes;
__m256i last_syndromes256;
syndromes256[0] = _mm256_set1_epi16(cdw[0]);

for (size_t i = 0; i < PARAM_N1 - 1; ++i) {
syndromes256[0] ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_1[i]);
}

last_syndromes256 = _mm256_set1_epi16(cdw[0]);

for (size_t i = 0; i < PARAM_N1 - 1; ++i) {
last_syndromes256 ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_2[i]);
}

__m128i *s128 = (__m128i *) &last_syndromes256;
_mm_store_si128((__m128i *) (syndromes + 16), *s128);

uint64_t *s8 = (uint64_t *) (syndromes + 24);
s8[0] = _mm_extract_epi64(s128[1], 0);

uint32_t *s12 = (uint32_t *) (syndromes + 28);
uint32_t *s32 = ((uint32_t *) &last_syndromes256) + 6;
s12[0] = *s32;
}



/**
* @brief Computes the error locator polynomial (ELP) sigma
*
* This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes). <br>
* We use the letter p for rho which is initialized at -1. <br>
* The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X). <br>
* Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p. <br>
* sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated. <br>
* We can properly correct only if the degree of sigma does not exceed PARAM_DELTA.
* This means only the first PARAM_DELTA + 1 coefficients of sigma are of value
* and we only need to save its first PARAM_DELTA - 1 coefficients.
*
* @returns the degree of the ELP sigma
* @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP
* @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes
*/
static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) {
uint16_t deg_sigma = 0;
uint16_t deg_sigma_p = 0;
uint16_t deg_sigma_copy = 0;
uint16_t sigma_copy[PARAM_DELTA + 1] = {0};
uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1};
uint16_t pp = (uint16_t) -1; // 2*rho
uint16_t d_p = 1;
uint16_t d = syndromes[0];

uint16_t mask1, mask2, mask12;
uint16_t deg_X, deg_X_sigma_p;
uint16_t dd;
uint16_t mu;

uint16_t i;

sigma[0] = 1;
for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) {
// Save sigma in case we need it to update X_sigma_p
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA));
deg_sigma_copy = deg_sigma;

dd = PQCLEAN_HQCRMRS128_AVX2_gf_mul(d, PQCLEAN_HQCRMRS128_AVX2_gf_inverse(d_p));

for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
sigma[i] ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(dd, X_sigma_p[i]);
}

deg_X = mu - pp;
deg_X_sigma_p = deg_X + deg_sigma_p;

// mask1 = 0xffff if(d != 0) and 0 otherwise
mask1 = -((uint16_t) - d >> 15);

// mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise
mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15);

// mask12 = 0xffff if the deg_sigma increased and 0 otherwise
mask12 = mask1 & mask2;
deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma);

if (mu == (2 * PARAM_DELTA - 1)) {
break;
}

pp ^= mask12 & (mu ^ pp);
d_p ^= mask12 & (d ^ d_p);
for (i = PARAM_DELTA; i; --i) {
X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]);
}

deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p);
d = syndromes[mu + 1];

for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
d ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(sigma[i], syndromes[mu + 1 - i]);
}
}

return deg_sigma;
}



/**
* @brief Computes the error polynomial error from the error locator polynomial sigma
*
* See function PQCLEAN_HQCRMRS128_AVX2_fft for more details.
*
* @param[out] error Array of 2^PARAM_M elements receiving the error polynomial
* @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error
* @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
*/
static void compute_roots(uint8_t *error, uint16_t *sigma) {
uint16_t w[1 << PARAM_M] = {0};

PQCLEAN_HQCRMRS128_AVX2_fft(w, sigma, PARAM_DELTA + 1);
PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(error, w);
}



/**
* @brief Computes the polynomial z(x)
*
* See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
*
* @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x)
* @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
* @param[in] degree Integer that is the degree of polynomial sigma
* @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes
*/
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) {
size_t i, j;
uint16_t mask;

z[0] = 1;

for (i = 1; i < PARAM_DELTA + 1; ++i) {
mask = -((uint16_t) (i - degree - 1) >> 15);
z[i] = mask & sigma[i];
}

z[1] ^= syndromes[0];

for (i = 2; i <= PARAM_DELTA; ++i) {
mask = -((uint16_t) (i - degree - 1) >> 15);
z[i] ^= mask & syndromes[i - 1];

for (j = 1; j < i; ++j) {
z[i] ^= mask & PQCLEAN_HQCRMRS128_AVX2_gf_mul(sigma[j], syndromes[i - j - 1]);
}
}
}



/**
* @brief Computes the error values
*
* See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
*
* @param[out] error_values Array of PARAM_DELTA elements receiving the error values
* @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x)
* @param[in] z_degree Integer that is the degree of polynomial z(x)
* @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error
*/
static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) {
uint16_t beta_j[PARAM_DELTA] = {0};
uint16_t e_j[PARAM_DELTA] = {0};

uint16_t delta_counter;
uint16_t delta_real_value;
uint16_t found;
uint16_t mask1;
uint16_t mask2;
uint16_t tmp1;
uint16_t tmp2;
uint16_t inverse;
uint16_t inverse_power_j;

// Compute the beta_{j_i} page 31 of the documentation
delta_counter = 0;
for (size_t i = 0; i < PARAM_N1; i++) {
found = 0;
mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
for (size_t j = 0; j < PARAM_DELTA; j++) {
mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
beta_j[j] += mask1 & mask2 & gf_exp[i];
found += mask1 & mask2 & 1;
}
delta_counter += found;
}
delta_real_value = delta_counter;

// Compute the e_{j_i} page 31 of the documentation
for (size_t i = 0; i < PARAM_DELTA; ++i) {
tmp1 = 1;
tmp2 = 1;
inverse = PQCLEAN_HQCRMRS128_AVX2_gf_inverse(beta_j[i]);
inverse_power_j = 1;

for (size_t j = 1; j <= PARAM_DELTA; ++j) {
inverse_power_j = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse_power_j, inverse);
tmp1 ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse_power_j, z[j]);
}
for (size_t k = 1; k < PARAM_DELTA; ++k) {
tmp2 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA])));
}
mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value
e_j[i] = mask1 & PQCLEAN_HQCRMRS128_AVX2_gf_mul(tmp1, PQCLEAN_HQCRMRS128_AVX2_gf_inverse(tmp2));
}

// Place the delta e_{j_i} values at the right coordinates of the output vector
delta_counter = 0;
for (size_t i = 0; i < PARAM_N1; ++i) {
found = 0;
mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
for (size_t j = 0; j < PARAM_DELTA; j++) {
mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
error_values[i] += mask1 & mask2 & e_j[j];
found += mask1 & mask2 & 1;
}
delta_counter += found;
}
}



/**
* @brief Correct the errors
*
* @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector
* @param[in] error Array of the error vector
* @param[in] error_values Array of PARAM_DELTA elements storing the error values
*/
static void correct_errors(uint8_t *cdw, const uint16_t *error_values) {
for (size_t i = 0; i < PARAM_N1; ++i) {
cdw[i] ^= error_values[i];
}
}



/**
* @brief Decodes the received word
*
* This function relies on six steps:
* <ol>
* <li> The first step, is the computation of the 2*PARAM_DELTA syndromes.
* <li> The second step is the computation of the error-locator polynomial sigma.
* <li> The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses.
* <li> The fourth step, is the polynomial z(x).
* <li> The fifth step, is the computation of the error values.
* <li> The sixth step is the correction of the errors in the received polynomial.
* </ol>
* For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error
*
* @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message
* @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word
*/
void PQCLEAN_HQCRMRS128_AVX2_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) {
uint16_t syndromes[2 * PARAM_DELTA] = {0};
uint16_t sigma[1 << PARAM_FFT] = {0};
uint8_t error[1 << PARAM_M] = {0};
uint16_t z[PARAM_N1] = {0};
uint16_t error_values[PARAM_N1] = {0};
uint16_t deg;

// Calculate the 2*PARAM_DELTA syndromes
compute_syndromes(syndromes, cdw);

// Compute the error locator polynomial sigma
// Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room
deg = compute_elp(sigma, syndromes);

// Compute the error polynomial error
compute_roots(error, sigma);

// Compute the polynomial z(x)
compute_z_poly(z, sigma, deg, syndromes);

// Compute the error values
compute_error_values(error_values, z, error);

// Correct the errors
correct_errors(cdw, error_values);

// Retrieve the message from the decoded codeword
memcpy(msg, cdw + (PARAM_G - 1), PARAM_K);

}

+ 20
- 0
src/kem/hqc/hqc-rmrs-128/avx2/reed_solomon.h
Файловите разлики са ограничени, защото са твърде много
Целия файл


+ 178
- 0
src/kem/hqc/hqc-rmrs-128/avx2/vector.c Целия файл

@@ -0,0 +1,178 @@
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "vector.h"
#include <immintrin.h>
#include <stdint.h>
#include <string.h>
/**
* @file vector.c
* @brief Implementation of vectors sampling and some utilities for the HQC scheme
*/



/**
* @brief Generates a vector of a given Hamming weight
*
* This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>.
* To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
* 1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
* 2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times 70853\f$
* 3. If \f$ x \geq t\f$, go to 1
* 4. It return \f$ r = x \mod 70853\f$
*
* The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
*
* @param[in] v Pointer to an array
* @param[in] weight Integer that is the Hamming weight
* @param[in] ctx Pointer to the context of the seed expander
*/
void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) {
size_t random_bytes_size = 3 * weight;
uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0};
uint32_t tmp[PARAM_OMEGA_R] = {0};
__m256i bit256[PARAM_OMEGA_R];
__m256i bloc256[PARAM_OMEGA_R];
__m256i posCmp256 = _mm256_set_epi64x(3, 2, 1, 0);
__m256i pos256;
__m256i mask256;
__m256i aux;
__m256i i256;
uint64_t bloc, pos, bit64;
uint8_t inc;
size_t i, j, k;

i = 0;
j = random_bytes_size;
while (i < weight) {
do {
if (j == random_bytes_size) {
seedexpander(ctx, rand_bytes, random_bytes_size);
j = 0;
}

tmp[i] = ((uint32_t) rand_bytes[j++]) << 16;
tmp[i] |= ((uint32_t) rand_bytes[j++]) << 8;
tmp[i] |= rand_bytes[j++];

} while (tmp[i] >= UTILS_REJECTION_THRESHOLD);

tmp[i] = tmp[i] % PARAM_N;

inc = 1;
for (k = 0; k < i; k++) {
if (tmp[k] == tmp[i]) {
inc = 0;
}
}
i += inc;
}

for (i = 0; i < weight; i++) {
// we store the bloc number and bit position of each vb[i]
bloc = tmp[i] >> 6;
bloc256[i] = _mm256_set1_epi64x(bloc >> 2);
pos = (bloc & 0x3UL);
pos256 = _mm256_set1_epi64x(pos);
mask256 = _mm256_cmpeq_epi64(pos256, posCmp256);
bit64 = 1ULL << (tmp[i] & 0x3f);
bit256[i] = _mm256_set1_epi64x(bit64)&mask256;
}

for (i = 0; i < CEIL_DIVIDE(PARAM_N, 256); i++) {
aux = _mm256_loadu_si256(((__m256i *)v) + i);
i256 = _mm256_set1_epi64x(i);

for (j = 0; j < weight; j++) {
mask256 = _mm256_cmpeq_epi64(bloc256[j], i256);
aux ^= bit256[j] & mask256;
}
_mm256_storeu_si256(((__m256i *)v) + i, aux);
}

}



/**
* @brief Generates a random vector of dimension <b>PARAM_N</b>
*
* This function generates a random binary vector of dimension <b>PARAM_N</b>. It generates a random
* array of bytes using the seedexpander function, and drop the extra bits using a mask.
*
* @param[in] v Pointer to an array
* @param[in] ctx Pointer to the context of the seed expander
*/
void PQCLEAN_HQCRMRS128_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) {
uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0};

seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES);

PQCLEAN_HQCRMRS128_AVX2_load8_arr(v, VEC_N_SIZE_64, rand_bytes, VEC_N_SIZE_BYTES);
v[VEC_N_SIZE_64 - 1] &= RED_MASK;
}



/**
* @brief Adds two vectors
*
* @param[out] o Pointer to an array that is the result
* @param[in] v1 Pointer to an array that is the first vector
* @param[in] v2 Pointer to an array that is the second vector
* @param[in] size Integer that is the size of the vectors
*/
void PQCLEAN_HQCRMRS128_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) {
for (uint32_t i = 0; i < size; ++i) {
o[i] = v1[i] ^ v2[i];
}
}



/**
* @brief Compares two vectors
*
* @param[in] v1 Pointer to an array that is first vector
* @param[in] v2 Pointer to an array that is second vector
* @param[in] size Integer that is the size of the vectors
* @returns 0 if the vectors are equals and a negative/psotive value otherwise
*/
uint8_t PQCLEAN_HQCRMRS128_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size) {
uint64_t r = 0;
for (size_t i = 0; i < size; i++) {
r |= v1[i] ^ v2[i];
}
r = (~r + 1) >> 63;
return (uint8_t) r;
}



/**
* @brief Resize a vector so that it contains <b>size_o</b> bits
*
* @param[out] o Pointer to the output vector
* @param[in] size_o Integer that is the size of the output vector in bits
* @param[in] v Pointer to the input vector
* @param[in] size_v Integer that is the size of the input vector in bits
*/
void PQCLEAN_HQCRMRS128_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) {
uint64_t mask = 0x7FFFFFFFFFFFFFFF;
int8_t val = 0;
if (size_o < size_v) {
if (size_o % 64) {
val = 64 - (size_o % 64);
}

memcpy(o, v, VEC_N1N2_SIZE_BYTES);

for (int8_t i = 0; i < val; ++i) {
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i);
}
} else {
memcpy(o, v, CEIL_DIVIDE(size_v, 8));
}
}

+ 27
- 0
src/kem/hqc/hqc-rmrs-128/avx2/vector.h Целия файл

@@ -0,0 +1,27 @@
#ifndef VECTOR_H
#define VECTOR_H


/**
* @file vector.h
* @brief Header file for vector.c
*/
#include "nistseedexpander.h"
#include "randombytes.h"
#include <stdint.h>

void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight);

void PQCLEAN_HQCRMRS128_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v);

void PQCLEAN_HQCRMRS128_AVX2_vect_set_random_from_randombytes(uint64_t *v);


void PQCLEAN_HQCRMRS128_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size);

uint8_t PQCLEAN_HQCRMRS128_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size);

void PQCLEAN_HQCRMRS128_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v);


#endif

+ 16
- 0
src/kem/hqc/hqc-rmrs-128/clean/CMakeLists.txt Целия файл

@@ -0,0 +1,16 @@
set(
SRC_CLEAN_HQCRMRS128
code.c
fft.c
gf2x.c
gf.c
hqc.c
kem.c
parsing.c
reed_muller.c
reed_solomon.c
vector.c
)

define_kem_alg(hqcrmrs128_clean
PQCLEAN_HQCRMRS128_CLEAN "${SRC_CLEAN_HQCRMRS128}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 25
- 0
src/kem/hqc/hqc-rmrs-128/clean/api.h Целия файл

@@ -0,0 +1,25 @@
#ifndef PQCLEAN_HQCRMRS128_CLEAN_API_H
#define PQCLEAN_HQCRMRS128_CLEAN_API_H
/**
* @file api.h
* @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
*/

#define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_ALGNAME "HQC-RMRS-128"

#define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_SECRETKEYBYTES 2289
#define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_PUBLICKEYBYTES 2249
#define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_BYTES 64
#define PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_CIPHERTEXTBYTES 4481

// As a technicality, the public key is appended to the secret key in order to respect the NIST API.
// Without this constraint, PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_SECRETKEYBYTES would be defined as 32

int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


#endif

+ 46
- 0
src/kem/hqc/hqc-rmrs-128/clean/code.c Целия файл

@@ -0,0 +1,46 @@
#include "code.h"
#include "parameters.h"
#include "reed_muller.h"
#include "reed_solomon.h"
#include <stdint.h>
#include <string.h>
/**
* @file code.c
* @brief Implementation of concatenated code
*/



/**
*
* @brief Encoding the message m to a code word em using the concatenated code
*
* First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain
* a concatenated code word.
*
* @param[out] em Pointer to an array that is the tensor code word
* @param[in] m Pointer to an array that is the message
*/
void PQCLEAN_HQCRMRS128_CLEAN_code_encode(uint8_t *em, const uint8_t *m) {
uint8_t tmp[VEC_N1_SIZE_BYTES] = {0};

PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_encode(tmp, m);
PQCLEAN_HQCRMRS128_CLEAN_reed_muller_encode(em, tmp);

}



/**
* @brief Decoding the code word em to a message m using the concatenated code
*
* @param[out] m Pointer to an array that is the message
* @param[in] em Pointer to an array that is the code word
*/
void PQCLEAN_HQCRMRS128_CLEAN_code_decode(uint8_t *m, const uint8_t *em) {
uint8_t tmp[VEC_N1_SIZE_BYTES] = {0};

PQCLEAN_HQCRMRS128_CLEAN_reed_muller_decode(tmp, em);
PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_decode(m, tmp);

}

+ 18
- 0
src/kem/hqc/hqc-rmrs-128/clean/code.h Целия файл

@@ -0,0 +1,18 @@
#ifndef CODE_H
#define CODE_H


/**
* @file code.h
* Header file of code.c
*/
#include "parameters.h"
#include <stddef.h>
#include <stdint.h>

void PQCLEAN_HQCRMRS128_CLEAN_code_encode(uint8_t *em, const uint8_t *message);

void PQCLEAN_HQCRMRS128_CLEAN_code_decode(uint8_t *m, const uint8_t *em);


#endif

+ 351
- 0
src/kem/hqc/hqc-rmrs-128/clean/fft.c Целия файл

@@ -0,0 +1,351 @@
#include "fft.h"
#include "gf.h"
#include "parameters.h"
#include <stdint.h>
#include <string.h>
/**
* @file fft.c
* Implementation of the additive FFT and its transpose.
* This implementation is based on the paper from Gao and Mateer: <br>
* Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
* IEEE Transactions on Information Theory 56 (2010), 6265--6272.
* http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
* and includes improvements proposed by Bernstein, Chou and Schwabe here:
* https://binary.cr.yp.to/mcbits-20130616.pdf
*/


static void compute_fft_betas(uint16_t *betas);
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size);
static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);


/**
* @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose
*
* @param[out] betas Array of size PARAM_M-1
*/
static void compute_fft_betas(uint16_t *betas) {
size_t i;
for (i = 0; i < PARAM_M - 1; ++i) {
betas[i] = 1 << (PARAM_M - 1 - i);
}
}



/**
* @brief Computes the subset sums of the given set
*
* The array subset_sums is such that its ith element is
* the subset sum of the set elements given by the binary form of i.
*
* @param[out] subset_sums Array of size 2^set_size receiving the subset sums
* @param[in] set Array of set_size elements
* @param[in] set_size Size of the array set
*/
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) {
uint16_t i, j;
subset_sums[0] = 0;

for (i = 0; i < set_size; ++i) {
for (j = 0; j < (1 << i); ++j) {
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j];
}
}
}



/**
* @brief Computes the radix conversion of a polynomial f in GF(2^m)[x]
*
* Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x)
* as proposed by Bernstein, Chou and Schwabe:
* https://binary.cr.yp.to/mcbits-20130616.pdf
*
* @param[out] f0 Array half the size of f
* @param[out] f1 Array half the size of f
* @param[in] f Array of size a power of 2
* @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
*/
static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
switch (m_f) {
case 4:
f0[4] = f[8] ^ f[12];
f0[6] = f[12] ^ f[14];
f0[7] = f[14] ^ f[15];
f1[5] = f[11] ^ f[13];
f1[6] = f[13] ^ f[14];
f1[7] = f[15];
f0[5] = f[10] ^ f[12] ^ f1[5];
f1[4] = f[9] ^ f[13] ^ f0[5];

f0[0] = f[0];
f1[3] = f[7] ^ f[11] ^ f[15];
f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3];
f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3];
f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3];
f1[2] = f[3] ^ f1[1] ^ f0[3];
f0[1] = f[2] ^ f0[2] ^ f1[1];
f1[0] = f[1] ^ f0[1];
break;

case 3:
f0[0] = f[0];
f0[2] = f[4] ^ f[6];
f0[3] = f[6] ^ f[7];
f1[1] = f[3] ^ f[5] ^ f[7];
f1[2] = f[5] ^ f[6];
f1[3] = f[7];
f0[1] = f[2] ^ f0[2] ^ f1[1];
f1[0] = f[1] ^ f0[1];
break;

case 2:
f0[0] = f[0];
f0[1] = f[2] ^ f[3];
f1[0] = f[1] ^ f0[1];
f1[1] = f[3];
break;

case 1:
f0[0] = f[0];
f1[0] = f[1];
break;

default:
radix_big(f0, f1, f, m_f);
break;
}
}

static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0};
uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0};

uint16_t Q0[1 << (PARAM_FFT - 2)] = {0};
uint16_t Q1[1 << (PARAM_FFT - 2)] = {0};
uint16_t R0[1 << (PARAM_FFT - 2)] = {0};
uint16_t R1[1 << (PARAM_FFT - 2)] = {0};

size_t i, n;

n = 1;
n <<= (m_f - 2);
memcpy(Q, f + 3 * n, 2 * n);
memcpy(Q + n, f + 3 * n, 2 * n);
memcpy(R, f, 4 * n);

for (i = 0; i < n; ++i) {
Q[i] ^= f[2 * n + i];
R[n + i] ^= Q[i];
}

radix(Q0, Q1, Q, m_f - 1);
radix(R0, R1, R, m_f - 1);

memcpy(f0, R0, 2 * n);
memcpy(f0 + n, Q0, 2 * n);
memcpy(f1, R1, 2 * n);
memcpy(f1 + n, Q1, 2 * n);
}



/**
* @brief Evaluates f at all subset sums of a given set
*
* This function is a subroutine of the function PQCLEAN_HQCRMRS128_CLEAN_fft.
*
* @param[out] w Array
* @param[in] f Array
* @param[in] f_coeffs Number of coefficients of f
* @param[in] m Number of betas
* @param[in] m_f Number of coefficients of f (one more than its degree)
* @param[in] betas FFT constants
*/
static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
uint16_t f0[1 << (PARAM_FFT - 2)] = {0};
uint16_t f1[1 << (PARAM_FFT - 2)] = {0};
uint16_t gammas[PARAM_M - 2] = {0};
uint16_t deltas[PARAM_M - 2] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0};
uint16_t u[1 << (PARAM_M - 2)] = {0};
uint16_t v[1 << (PARAM_M - 2)] = {0};
uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0};

uint16_t beta_m_pow;
size_t i, j, k;
size_t x;

// Step 1
if (m_f == 1) {
for (i = 0; i < m; ++i) {
tmp[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas[i], f[1]);
}

w[0] = f[0];
x = 1;
for (j = 0; j < m; ++j) {
for (k = 0; k < x; ++k) {
w[x + k] = w[k] ^ tmp[j];
}
x <<= 1;
}

return;
}

// Step 2: compute g
if (betas[m - 1] != 1) {
beta_m_pow = 1;
x = 1;
x <<= m_f;
for (i = 1; i < x; ++i) {
beta_m_pow = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(beta_m_pow, betas[m - 1]);
f[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(beta_m_pow, f[i]);
}
}

// Step 3
radix(f0, f1, f, m_f);

// Step 4: compute gammas and deltas
for (i = 0; i + 1 < m; ++i) {
gammas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas[i], PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(betas[m - 1]));
deltas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_square(gammas[i]) ^ gammas[i];
}

// Compute gammas sums
compute_subset_sums(gammas_sums, gammas, m - 1);

// Step 5
fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);

k = 1;
k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small.
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant
w[0] = u[0];
w[k] = u[0] ^ f1[0];
for (i = 1; i < k; ++i) {
w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gammas_sums[i], f1[0]);
w[k + i] = w[i] ^ f1[0];
}
} else {
fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas);

// Step 6
memcpy(w + k, v, 2 * k);
w[0] = u[0];
w[k] ^= u[0];
for (i = 1; i < k; ++i) {
w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gammas_sums[i], v[i]);
w[k + i] ^= w[i];
}
}
}



/**
* @brief Evaluates f on all fields elements using an additive FFT algorithm
*
* f_coeffs is the number of coefficients of f (one less than its degree). <br>
* The FFT proceeds recursively to evaluate f at all subset sums of a basis B. <br>
* This implementation is based on the paper from Gao and Mateer: <br>
* Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
* IEEE Transactions on Information Theory 56 (2010), 6265--6272.
* http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
* and includes improvements proposed by Bernstein, Chou and Schwabe here:
* https://binary.cr.yp.to/mcbits-20130616.pdf <br>
* Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas,
* meaning the first gammas subset sums are actually the subset sums of betas (except 1). <br>
* Also note that f is altered during computation (twisted at each level).
*
* @param[out] w Array
* @param[in] f Array of 2^PARAM_FFT elements
* @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1)
*/
void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) {
uint16_t betas[PARAM_M - 1] = {0};
uint16_t betas_sums[1 << (PARAM_M - 1)] = {0};
uint16_t f0[1 << (PARAM_FFT - 1)] = {0};
uint16_t f1[1 << (PARAM_FFT - 1)] = {0};
uint16_t deltas[PARAM_M - 1] = {0};
uint16_t u[1 << (PARAM_M - 1)] = {0};
uint16_t v[1 << (PARAM_M - 1)] = {0};

size_t i, k;

// Follows Gao and Mateer algorithm
compute_fft_betas(betas);

// Step 1: PARAM_FFT > 1, nothing to do

// Compute gammas sums
compute_subset_sums(betas_sums, betas, PARAM_M - 1);

// Step 2: beta_m = 1, nothing to do

// Step 3
radix(f0, f1, f, PARAM_FFT);

// Step 4: Compute deltas
for (i = 0; i < PARAM_M - 1; ++i) {
deltas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_square(betas[i]) ^ betas[i];
}

// Step 5
fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);
fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);

k = 1 << (PARAM_M - 1);
// Step 6, 7 and error polynomial computation
memcpy(w + k, v, 2 * k);

// Check if 0 is root
w[0] = u[0];

// Check if 1 is root
w[k] ^= u[0];

// Find other roots
for (i = 1; i < k; ++i) {
w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas_sums[i], v[i]);
w[k + i] ^= w[i];
}
}



/**
* @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements.
*
* @param[out] error Array with the error
* @param[out] error_compact Array with the error in a compact form
* @param[in] w Array of size 2^PARAM_M
*/
void PQCLEAN_HQCRMRS128_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
uint16_t gammas[PARAM_M - 1] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
uint16_t k;
size_t i, index;

compute_fft_betas(gammas);
compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

k = 1 << (PARAM_M - 1);
error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15);
error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15);

for (i = 1; i < k; ++i) {
index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]];
error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15);

index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1];
error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15);
}
}

+ 18
- 0
src/kem/hqc/hqc-rmrs-128/clean/fft.h Целия файл

@@ -0,0 +1,18 @@
#ifndef FFT_H
#define FFT_H


/**
* @file fft.h
* Header file of fft.c
*/

#include <stddef.h>
#include <stdint.h>

void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs);

void PQCLEAN_HQCRMRS128_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w);


#endif

+ 63
- 0
src/kem/hqc/hqc-rmrs-128/clean/gf.c Целия файл

@@ -0,0 +1,63 @@
#include "gf.h"
#include "parameters.h"
#include <stdint.h>
/**
* @file gf.c
* Galois field implementation with multiplication using lookup tables
*/


/**
* @brief Multiplies nonzero element a by element b
* @returns the product a*b
* @param[in] a First element of GF(2^PARAM_M) to multiply (cannot be zero)
* @param[in] b Second element of GF(2^PARAM_M) to multiply (cannot be zero)
*/
uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mul(uint16_t a, uint16_t b) {
uint16_t mask;
mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0
mask &= (uint16_t) (-((int32_t) b) >> 31); // b != 0
return mask & gf_exp[PQCLEAN_HQCRMRS128_CLEAN_gf_mod(gf_log[a] + gf_log[b])];
}



/**
* @brief Squares an element of GF(2^PARAM_M)
* @returns a^2
* @param[in] a Element of GF(2^PARAM_M)
*/
uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_square(uint16_t a) {
int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0
return mask & gf_exp[PQCLEAN_HQCRMRS128_CLEAN_gf_mod(2 * gf_log[a])];
}



/**
* @brief Computes the inverse of an element of GF(2^PARAM_M)
* @returns the inverse of a
* @param[in] a Element of GF(2^PARAM_M)
*/
uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(uint16_t a) {
int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0
return mask & gf_exp[PARAM_GF_MUL_ORDER - gf_log[a]];
}



/**
* @brief Returns i modulo 2^PARAM_M-1
* i must be less than 2*(2^PARAM_M-1).
* Therefore, the return value is either i or i-2^PARAM_M+1.
* @returns i mod (2^PARAM_M-1)
* @param[in] i The integer whose modulo is taken
*/
uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mod(uint16_t i) {
uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER);

// mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
uint16_t mask = -(tmp >> 15);

return tmp + (mask & PARAM_GF_MUL_ORDER);
}

+ 39
- 0
src/kem/hqc/hqc-rmrs-128/clean/gf.h Целия файл

@@ -0,0 +1,39 @@
#ifndef GF_H
#define GF_H


/**
* @file gf.h
* Header file of gf.c
*/

#include <stddef.h>
#include <stdint.h>


/**
* Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8.
* The last two elements are needed by the PQCLEAN_HQCRMRS128_CLEAN_gf_mul function
* (for example if both elements to multiply are zero).
*/
static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 };



/**
* Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8).
* The logarithm of 0 is set to 0 by convention.
*/
static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 };


uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mul(uint16_t a, uint16_t b);

uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_square(uint16_t a);

uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(uint16_t a);

uint16_t PQCLEAN_HQCRMRS128_CLEAN_gf_mod(uint16_t i);


#endif

+ 154
- 0
src/kem/hqc/hqc-rmrs-128/clean/gf2x.c Целия файл

@@ -0,0 +1,154 @@
#include "gf2x.h"
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include <stdint.h>
/**
* \file gf2x.c
* \brief Implementation of multiplication of two polynomials
*/


static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2);
static void reduce(uint64_t *o, const uint64_t *a);
static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx);

/**
* @brief swap two elements in a table
*
* This function exchanges tab[elt1] with tab[elt2]
*
* @param[in] tab Pointer to the table
* @param[in] elt1 Index of the first element
* @param[in] elt2 Index of the second element
*/
static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2) {
uint16_t tmp = tab[elt1];

tab[elt1] = tab[elt2];
tab[elt2] = tmp;
}



/**
* @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$
*
* This function computes the modular reduction of the polynomial a(x)
*
* @param[in] a Pointer to the polynomial a(x)
* @param[out] o Pointer to the result
*/
static void reduce(uint64_t *o, const uint64_t *a) {
size_t i;
uint64_t r;
uint64_t carry;

for (i = 0; i < VEC_N_SIZE_64; i++) {
r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63);
carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63)));
o[i] = a[i] ^ r ^ carry;
}

o[VEC_N_SIZE_64 - 1] &= RED_MASK;
}



/**
* @brief computes product of the polynomial a1(x) with the sparse polynomial a2
*
* o(x) = a1(x)a2(x)
*
* @param[out] o Pointer to the result
* @param[in] a1 Pointer to the sparse polynomial a2 (list of degrees of the monomials which appear in a2)
* @param[in] a2 Pointer to the polynomial a1(x)
* @param[in] weight Hamming wifht of the sparse polynomial a2
* @param[in] ctx Pointer to a seed expander used to randomize the multiplication process
*/
static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) {
//static uint32_t fast_convolution_mult(const uint64_t *A, const uint32_t *vB, uint64_t *C, const uint16_t w, AES_XOF_struct *ctx)
uint64_t carry;
uint32_t dec, s;
uint64_t table[16 * (VEC_N_SIZE_64 + 1)];
uint16_t permuted_table[16];
uint16_t permutation_table[16];
uint16_t permuted_sparse_vect[PARAM_OMEGA_E];
uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
uint64_t tmp;
uint64_t *pt;
uint8_t *res;
size_t i, j;

for (i = 0; i < 16; i++) {
permuted_table[i] = (uint16_t) i;
}

seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));

for (i = 0; i < 15; i++) {
swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
}

pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = a2[j];
}
pt[VEC_N_SIZE_64] = 0x0;

for (i = 1; i < 16; i++) {
carry = 0;
pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = (a2[j] << i) ^ carry;
carry = (a2[j] >> ((64 - i)));
}
pt[VEC_N_SIZE_64] = carry;
}

for (i = 0; i < weight; i++) {
permuted_sparse_vect[i] = (uint16_t) i;
}

seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));

for (i = 0; i + 1 < weight; i++) {
swap(permuted_sparse_vect + i, 0, (uint16_t) (permutation_sparse_vect[i] % (weight - i)));
}

for (i = 0; i < weight; i++) {
dec = a1[permuted_sparse_vect[i]] & 0xf;
s = a1[permuted_sparse_vect[i]] >> 4;
res = o + 2 * s;
pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));

for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
tmp = PQCLEAN_HQCRMRS128_CLEAN_load8(res);
PQCLEAN_HQCRMRS128_CLEAN_store8(res, tmp ^ pt[j]);
res += 8;
}
}
}



/**
* @brief Multiply two polynomials modulo \f$ X^n - 1\f$.
*
* This functions multiplies a sparse polynomial <b>a1</b> (of Hamming weight equal to <b>weight</b>)
* and a dense polynomial <b>a2</b>. The multiplication is done modulo \f$ X^n - 1\f$.
*
* @param[out] o Pointer to the result
* @param[in] a1 Pointer to the sparse polynomial
* @param[in] a2 Pointer to the dense polynomial
* @param[in] weight Integer that is the weigt of the sparse polynomial
* @param[in] ctx Pointer to the randomness context
*/
void PQCLEAN_HQCRMRS128_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) {
uint64_t tmp[2 * VEC_N_SIZE_64 + 1] = {0};

fast_convolution_mult((uint8_t *) tmp, a1, a2, weight, ctx);
PQCLEAN_HQCRMRS128_CLEAN_load8_arr(tmp, 2 * VEC_N_SIZE_64 + 1, (uint8_t *) tmp, sizeof(tmp));
reduce(o, tmp);
}

+ 16
- 0
src/kem/hqc/hqc-rmrs-128/clean/gf2x.h Целия файл

@@ -0,0 +1,16 @@
#ifndef GF2X_H
#define GF2X_H


/**
* @file gf2x.h
* @brief Header file for gf2x.c
*/
#include "nistseedexpander.h"
#include "randombytes.h"
#include <stdint.h>

void PQCLEAN_HQCRMRS128_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx);


#endif

+ 144
- 0
src/kem/hqc/hqc-rmrs-128/clean/hqc.c Целия файл

@@ -0,0 +1,144 @@
#include "code.h"
#include "gf2x.h"
#include "hqc.h"
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "vector.h"
#include <stdint.h>
/**
* @file hqc.c
* @brief Implementation of hqc.h
*/



/**
* @brief Keygen of the HQC_PKE IND_CPA scheme
*
* The public key is composed of the syndrome <b>s</b> as well as the <b>seed</b> used to generate the vector <b>h</b>.
*
* The secret key is composed of the <b>seed</b> used to generate vectors <b>x</b> and <b>y</b>.
* As a technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] pk String containing the public key
* @param[out] sk String containing the secret key
*/
void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) {
AES_XOF_struct sk_seedexpander;
AES_XOF_struct pk_seedexpander;
uint8_t sk_seed[SEED_BYTES] = {0};
uint8_t pk_seed[SEED_BYTES] = {0};
uint64_t x[VEC_N_SIZE_64] = {0};
uint32_t y[PARAM_OMEGA] = {0};
uint64_t h[VEC_N_SIZE_64] = {0};
uint64_t s[VEC_N_SIZE_64] = {0};

// Create seed_expanders for public key and secret key
randombytes(sk_seed, SEED_BYTES);
seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

randombytes(pk_seed, SEED_BYTES);
seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

// Compute secret key
PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA);

// Compute public key
PQCLEAN_HQCRMRS128_CLEAN_vect_set_random(&pk_seedexpander, h);
PQCLEAN_HQCRMRS128_CLEAN_vect_mul(s, y, h, PARAM_OMEGA, &sk_seedexpander);
PQCLEAN_HQCRMRS128_CLEAN_vect_add(s, x, s, VEC_N_SIZE_64);

// Parse keys to string
PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_to_string(pk, pk_seed, s);
PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_to_string(sk, sk_seed, pk);

}



/**
* @brief Encryption of the HQC_PKE IND_CPA scheme
*
* The cihertext is composed of vectors <b>u</b> and <b>v</b>.
*
* @param[out] u Vector u (first part of the ciphertext)
* @param[out] v Vector v (second part of the ciphertext)
* @param[in] m Vector representing the message to encrypt
* @param[in] theta Seed used to derive randomness required for encryption
* @param[in] pk String containing the public key
*/
void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) {
AES_XOF_struct seedexpander;
uint64_t h[VEC_N_SIZE_64] = {0};
uint64_t s[VEC_N_SIZE_64] = {0};
uint64_t r1[VEC_N_SIZE_64] = {0};
uint32_t r2[PARAM_OMEGA_R] = {0};
uint64_t e[VEC_N_SIZE_64] = {0};
uint64_t tmp1[VEC_N_SIZE_64] = {0};
uint64_t tmp2[VEC_N_SIZE_64] = {0};

// Create seed_expander from theta
seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH);

// Retrieve h and s from public key
PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_from_string(h, s, pk);

// Generate r1, r2 and e
PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R);
PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(&seedexpander, r2, PARAM_OMEGA_R);
PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E);

// Compute u = r1 + r2.h
PQCLEAN_HQCRMRS128_CLEAN_vect_mul(u, r2, h, PARAM_OMEGA_R, &seedexpander);
PQCLEAN_HQCRMRS128_CLEAN_vect_add(u, r1, u, VEC_N_SIZE_64);

// Compute v = m.G by encoding the message
PQCLEAN_HQCRMRS128_CLEAN_code_encode((uint8_t *)v, m);
PQCLEAN_HQCRMRS128_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES);
PQCLEAN_HQCRMRS128_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);

// Compute v = m.G + s.r2 + e
PQCLEAN_HQCRMRS128_CLEAN_vect_mul(tmp2, r2, s, PARAM_OMEGA_R, &seedexpander);
PQCLEAN_HQCRMRS128_CLEAN_vect_add(tmp2, e, tmp2, VEC_N_SIZE_64);
PQCLEAN_HQCRMRS128_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64);
PQCLEAN_HQCRMRS128_CLEAN_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N);

}



/**
* @brief Decryption of the HQC_PKE IND_CPA scheme
*
* @param[out] m Vector representing the decrypted message
* @param[in] u Vector u (first part of the ciphertext)
* @param[in] v Vector v (second part of the ciphertext)
* @param[in] sk String containing the secret key
*/
void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) {
uint8_t pk[PUBLIC_KEY_BYTES] = {0};
uint64_t tmp1[VEC_N_SIZE_64] = {0};
uint64_t tmp2[VEC_N_SIZE_64] = {0};
uint32_t y[PARAM_OMEGA] = {0};
AES_XOF_struct perm_seedexpander;
uint8_t perm_seed[SEED_BYTES] = {0};

// Retrieve x, y, pk from secret key
PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_from_string(tmp1, y, pk, sk);

randombytes(perm_seed, SEED_BYTES);
seedexpander_init(&perm_seedexpander, perm_seed, perm_seed + 32, SEEDEXPANDER_MAX_LENGTH);

// Compute v - u.y
PQCLEAN_HQCRMRS128_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);
PQCLEAN_HQCRMRS128_CLEAN_vect_mul(tmp2, y, u, PARAM_OMEGA, &perm_seedexpander);
PQCLEAN_HQCRMRS128_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64);


// Compute m by decoding v - u.y
PQCLEAN_HQCRMRS128_CLEAN_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_SIZE_64);
PQCLEAN_HQCRMRS128_CLEAN_code_decode(m, (uint8_t *)tmp1);
}

+ 19
- 0
src/kem/hqc/hqc-rmrs-128/clean/hqc.h Целия файл

@@ -0,0 +1,19 @@
#ifndef HQC_H
#define HQC_H


/**
* @file hqc.h
* @brief Functions of the HQC_PKE IND_CPA scheme
*/

#include <stdint.h>

void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk);

void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk);

void PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk);


#endif

+ 140
- 0
src/kem/hqc/hqc-rmrs-128/clean/kem.c Целия файл

@@ -0,0 +1,140 @@
#include "api.h"
#include "fips202.h"
#include "hqc.h"
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "sha2.h"
#include "vector.h"
#include <stdint.h>
#include <string.h>
/**
* @file kem.c
* @brief Implementation of api.h
*/



/**
* @brief Keygen of the HQC_KEM IND_CAA2 scheme
*
* The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>.
*
* The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
* As a technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] pk String containing the public key
* @param[out] sk String containing the secret key
* @returns 0 if keygen is successful
*/
int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) {

PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_keygen(pk, sk);
return 0;
}



/**
* @brief Encapsulation of the HQC_KEM IND_CAA2 scheme
*
* @param[out] ct String containing the ciphertext
* @param[out] ss String containing the shared secret
* @param[in] pk String containing the public key
* @returns 0 if encapsulation is successful
*/
int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) {

uint8_t theta[SHA512_BYTES] = {0};
uint8_t m[VEC_K_SIZE_BYTES] = {0};
uint64_t u[VEC_N_SIZE_64] = {0};
uint64_t v[VEC_N1N2_SIZE_64] = {0};
unsigned char d[SHA512_BYTES] = {0};
unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

// Computing m
randombytes(m, VEC_K_SIZE_BYTES);

// Computing theta
sha3_512(theta, m, VEC_K_SIZE_BYTES);

// Encrypting m
PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(u, v, m, theta, pk);

// Computing d
sha512(d, m, VEC_K_SIZE_BYTES);

// Computing shared secret
memcpy(mc, m, VEC_K_SIZE_BYTES);
PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

// Computing ciphertext
PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_to_string(ct, u, v, d);


return 0;
}



/**
* @brief Decapsulation of the HQC_KEM IND_CAA2 scheme
*
* @param[out] ss String containing the shared secret
* @param[in] ct String containing the cipĥertext
* @param[in] sk String containing the secret key
* @returns 0 if decapsulation is successful, -1 otherwise
*/
int PQCLEAN_HQCRMRS128_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) {

uint8_t result;
uint64_t u[VEC_N_SIZE_64] = {0};
uint64_t v[VEC_N1N2_SIZE_64] = {0};
unsigned char d[SHA512_BYTES] = {0};
unsigned char pk[PUBLIC_KEY_BYTES] = {0};
uint8_t m[VEC_K_SIZE_BYTES] = {0};
uint8_t theta[SHA512_BYTES] = {0};
uint64_t u2[VEC_N_SIZE_64] = {0};
uint64_t v2[VEC_N1N2_SIZE_64] = {0};
unsigned char d2[SHA512_BYTES] = {0};
unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

// Retrieving u, v and d from ciphertext
PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_from_string(u, v, d, ct);

// Retrieving pk from sk
memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);

// Decryting
PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_decrypt(m, u, v, sk);

// Computing theta
sha3_512(theta, m, VEC_K_SIZE_BYTES);

// Encrypting m'
PQCLEAN_HQCRMRS128_CLEAN_hqc_pke_encrypt(u2, v2, m, theta, pk);

// Computing d'
sha512(d2, m, VEC_K_SIZE_BYTES);

// Computing shared secret
memcpy(mc, m, VEC_K_SIZE_BYTES);
PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
PQCLEAN_HQCRMRS128_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

// Abort if c != c' or d != d'
result = PQCLEAN_HQCRMRS128_CLEAN_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES);
result |= PQCLEAN_HQCRMRS128_CLEAN_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES);
result |= PQCLEAN_HQCRMRS128_CLEAN_vect_compare(d, d2, SHA512_BYTES);
result = (uint8_t) (-((int16_t) result) >> 15);
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) {
ss[i] &= ~result;
}


return -(result & 1);
}

+ 98
- 0
src/kem/hqc/hqc-rmrs-128/clean/parameters.h Целия файл

@@ -0,0 +1,98 @@
#ifndef HQC_PARAMETERS_H
#define HQC_PARAMETERS_H


/**
* @file parameters.h
* @brief Parameters of the HQC_KEM IND-CCA2 scheme
*/
#include "api.h"


#define CEIL_DIVIDE(a, b) (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/

/*
#define PARAM_N Define the parameter n of the scheme
#define PARAM_N1 Define the parameter n1 of the scheme (length of Reed-Solomon code)
#define PARAM_N2 Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code)
#define PARAM_N1N2 Define the length in bits of the Concatenated code
#define PARAM_OMEGA Define the parameter omega of the scheme
#define PARAM_OMEGA_E Define the parameter omega_e of the scheme
#define PARAM_OMEGA_R Define the parameter omega_r of the scheme
#define PARAM_SECURITY Define the security level corresponding to the chosen parameters
#define PARAM_DFR_EXP Define the decryption failure rate corresponding to the chosen parameters

#define SECRET_KEY_BYTES Define the size of the secret key in bytes
#define PUBLIC_KEY_BYTES Define the size of the public key in bytes
#define SHARED_SECRET_BYTES Define the size of the shared secret in bytes
#define CIPHERTEXT_BYTES Define the size of the ciphertext in bytes

#define UTILS_REJECTION_THRESHOLD Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function)
#define VEC_N_SIZE_BYTES Define the size of the array used to store a PARAM_N sized vector in bytes
#define VEC_K_SIZE_BYTES Define the size of the array used to store a PARAM_K sized vector in bytes
#define VEC_N1Y_SIZE_BYTES Define the size of the array used to store a PARAM_N1 sized vector in bytes
#define VEC_N1N2_SIZE_BYTES Define the size of the array used to store a PARAM_N1N2 sized vector in bytes

#define VEC_N_SIZE_64 Define the size of the array used to store a PARAM_N sized vector in 64 bits
#define VEC_K_SIZE_64 Define the size of the array used to store a PARAM_K sized vector in 64 bits
#define VEC_N1_SIZE_64 Define the size of the array used to store a PARAM_N1 sized vector in 64 bits
#define VEC_N1N2_SIZE_64 Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits

#define PARAM_DELTA Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code)
#define PARAM_M Define a positive integer
#define PARAM_GF_POLY Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form
#define PARAM_GF_MUL_ORDER Define the size of the multiplicative group of GF(2^PARAM_M), i.e 2^PARAM_M -1
#define PARAM_K Define the size of the information bits of the Reed-Solomon code
#define PARAM_G Define the size of the generator polynomial of Reed-Solomon code
#define PARAM_FFT The additive FFT takes a 2^PARAM_FFT polynomial as input
We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24
The smallest power of 2 greater than 24+1 is 32=2^5
#define RS_POLY_COEFS Coefficients of the generator polynomial of the Reed-Solomon code

#define RED_MASK A mask fot the higher bits of a vector
#define SHA512_BYTES Define the size of SHA512 output in bytes
#define SEED_BYTES Define the size of the seed in bytes
#define SEEDEXPANDER_MAX_LENGTH Define the seed expander max length
*/

#define PARAM_N 17669
#define PARAM_N1 46
#define PARAM_N2 384
#define PARAM_N1N2 17664
#define PARAM_OMEGA 66
#define PARAM_OMEGA_E 75
#define PARAM_OMEGA_R 75
#define PARAM_SECURITY 128
#define PARAM_DFR_EXP 128

#define SECRET_KEY_BYTES PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_SECRETKEYBYTES
#define PUBLIC_KEY_BYTES PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_PUBLICKEYBYTES
#define SHARED_SECRET_BYTES PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_BYTES
#define CIPHERTEXT_BYTES PQCLEAN_HQCRMRS128_CLEAN_CRYPTO_CIPHERTEXTBYTES

#define UTILS_REJECTION_THRESHOLD 16767881
#define VEC_N_SIZE_BYTES CEIL_DIVIDE(PARAM_N, 8)
#define VEC_K_SIZE_BYTES PARAM_K
#define VEC_N1_SIZE_BYTES PARAM_N1
#define VEC_N1N2_SIZE_BYTES CEIL_DIVIDE(PARAM_N1N2, 8)

#define VEC_N_SIZE_64 CEIL_DIVIDE(PARAM_N, 64)
#define VEC_K_SIZE_64 CEIL_DIVIDE(PARAM_K, 8)
#define VEC_N1_SIZE_64 CEIL_DIVIDE(PARAM_N1, 8)
#define VEC_N1N2_SIZE_64 CEIL_DIVIDE(PARAM_N1N2, 64)

#define PARAM_DELTA 15
#define PARAM_M 8
#define PARAM_GF_POLY 0x11D
#define PARAM_GF_MUL_ORDER 255
#define PARAM_K 16
#define PARAM_G 31
#define PARAM_FFT 5
#define RS_POLY_COEFS 89,69,153,116,176,117,111,75,73,233,242,233,65,210,21,139,103,173,67,118,105,210,174,110,74,69,228,82,255,181,1

#define RED_MASK 0x1f
#define SHA512_BYTES 64
#define SEED_BYTES 40
#define SEEDEXPANDER_MAX_LENGTH 4294967295

#endif

+ 186
- 0
src/kem/hqc/hqc-rmrs-128/clean/parsing.c Целия файл

@@ -0,0 +1,186 @@
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "vector.h"
#include <stdint.h>
#include <string.h>
/**
* @file parsing.c
* @brief Functions to parse secret key, public key and ciphertext of the HQC scheme
*/


void PQCLEAN_HQCRMRS128_CLEAN_store8(unsigned char *out, uint64_t in) {
out[0] = (in >> 0x00) & 0xFF;
out[1] = (in >> 0x08) & 0xFF;
out[2] = (in >> 0x10) & 0xFF;
out[3] = (in >> 0x18) & 0xFF;
out[4] = (in >> 0x20) & 0xFF;
out[5] = (in >> 0x28) & 0xFF;
out[6] = (in >> 0x30) & 0xFF;
out[7] = (in >> 0x38) & 0xFF;
}


uint64_t PQCLEAN_HQCRMRS128_CLEAN_load8(const unsigned char *in) {
uint64_t ret = in[7];

for (int8_t i = 6; i >= 0; i--) {
ret <<= 8;
ret |= in[i];
}

return ret;
}

void PQCLEAN_HQCRMRS128_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) {
size_t index_in = 0;
size_t index_out = 0;

// first copy by 8 bytes
if (inlen >= 8 && outlen >= 1) {
while (index_out < outlen && index_in + 8 <= inlen) {
out64[index_out] = PQCLEAN_HQCRMRS128_CLEAN_load8(in8 + index_in);

index_in += 8;
index_out += 1;
}
}

// we now need to do the last 7 bytes if necessary
if (index_in >= inlen || index_out >= outlen) {
return;
}
out64[index_out] = in8[inlen - 1];
for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) {
out64[index_out] <<= 8;
out64[index_out] |= in8[index_in + i];
}
}

void PQCLEAN_HQCRMRS128_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) {
for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) {
out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF;
index_out++;
if (index_out % 8 == 0) {
index_in++;
}
}
}


/**
* @brief Parse a secret key into a string
*
* The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
* As technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] sk String containing the secret key
* @param[in] sk_seed Seed used to generate the secret key
* @param[in] pk String containing the public key
*/
void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) {
memcpy(sk, sk_seed, SEED_BYTES);
sk += SEED_BYTES;
memcpy(sk, pk, PUBLIC_KEY_BYTES);
}

/**
* @brief Parse a secret key from a string
*
* The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
* As technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] x uint64_t representation of vector x
* @param[out] y uint32_t representation of vector y
* @param[out] pk String containing the public key
* @param[in] sk String containing the secret key
*/
void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk) {
AES_XOF_struct sk_seedexpander;
uint8_t sk_seed[SEED_BYTES] = {0};

memcpy(sk_seed, sk, SEED_BYTES);
sk += SEED_BYTES;
memcpy(pk, sk, PUBLIC_KEY_BYTES);

seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA);
}

/**
* @brief Parse a public key into a string
*
* The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
*
* @param[out] pk String containing the public key
* @param[in] pk_seed Seed used to generate the public key
* @param[in] s uint8_t representation of vector s
*/
void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) {
memcpy(pk, pk_seed, SEED_BYTES);
PQCLEAN_HQCRMRS128_CLEAN_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64);
}



/**
* @brief Parse a public key from a string
*
* The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
*
* @param[out] h uint8_t representation of vector h
* @param[out] s uint8_t representation of vector s
* @param[in] pk String containing the public key
*/
void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) {
AES_XOF_struct pk_seedexpander;
uint8_t pk_seed[SEED_BYTES] = {0};

memcpy(pk_seed, pk, SEED_BYTES);
pk += SEED_BYTES;
PQCLEAN_HQCRMRS128_CLEAN_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES);

seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
PQCLEAN_HQCRMRS128_CLEAN_vect_set_random(&pk_seedexpander, h);
}


/**
* @brief Parse a ciphertext into a string
*
* The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
*
* @param[out] ct String containing the ciphertext
* @param[in] u uint8_t representation of vector u
* @param[in] v uint8_t representation of vector v
* @param[in] d String containing the hash d
*/
void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) {
PQCLEAN_HQCRMRS128_CLEAN_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
ct += VEC_N_SIZE_BYTES;
PQCLEAN_HQCRMRS128_CLEAN_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
ct += VEC_N1N2_SIZE_BYTES;
memcpy(ct, d, SHA512_BYTES);
}


/**
* @brief Parse a ciphertext from a string
*
* The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
*
* @param[out] u uint8_t representation of vector u
* @param[out] v uint8_t representation of vector v
* @param[out] d String containing the hash d
* @param[in] ct String containing the ciphertext
*/
void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) {
PQCLEAN_HQCRMRS128_CLEAN_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES);
ct += VEC_N_SIZE_BYTES;
PQCLEAN_HQCRMRS128_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES);
ct += VEC_N1N2_SIZE_BYTES;
memcpy(d, ct, SHA512_BYTES);
}

+ 36
- 0
src/kem/hqc/hqc-rmrs-128/clean/parsing.h Целия файл

@@ -0,0 +1,36 @@
#ifndef PARSING_H
#define PARSING_H


/**
* @file parsing.h
* @brief Header file for parsing.c
*/

#include <stdint.h>

void PQCLEAN_HQCRMRS128_CLEAN_store8(unsigned char *out, uint64_t in);

uint64_t PQCLEAN_HQCRMRS128_CLEAN_load8(const unsigned char *in);

void PQCLEAN_HQCRMRS128_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen);

void PQCLEAN_HQCRMRS128_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen);


void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk);

void PQCLEAN_HQCRMRS128_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk);


void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s);

void PQCLEAN_HQCRMRS128_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk);


void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d);

void PQCLEAN_HQCRMRS128_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct);


#endif

+ 237
- 0
src/kem/hqc/hqc-rmrs-128/clean/reed_muller.c Целия файл

@@ -0,0 +1,237 @@
#include "parameters.h"
#include "reed_muller.h"
#include <stdint.h>
#include <string.h>
/**
* @file reed_muller.c
* Constant time implementation of Reed-Muller code RM(1,7)
*/



// number of repeated code words
#define MULTIPLICITY CEIL_DIVIDE(PARAM_N2, 128)

// copy bit 0 into all bits of a 32 bit value
#define BIT0MASK(x) (-((x) & 1))


static void encode(uint8_t *word, uint8_t message);
static void hadamard(uint16_t src[128], uint16_t dst[128]);
static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]);
static uint8_t find_peaks(const uint16_t transform[128]);



/**
* @brief Encode a single byte into a single codeword using RM(1,7)
*
* Encoding matrix of this code:
* bit pattern (note that bits are numbered big endian)
* 0 aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
* 1 cccccccc cccccccc cccccccc cccccccc
* 2 f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0
* 3 ff00ff00 ff00ff00 ff00ff00 ff00ff00
* 4 ffff0000 ffff0000 ffff0000 ffff0000
* 5 ffffffff 00000000 ffffffff 00000000
* 6 ffffffff ffffffff 00000000 00000000
* 7 ffffffff ffffffff ffffffff ffffffff
*
* @param[out] word An RM(1,7) codeword
* @param[in] message A message
*/
static void encode(uint8_t *word, uint8_t message) {
uint32_t e;
// bit 7 flips all the bits, do that first to save work
e = BIT0MASK(message >> 7);
// bits 0, 1, 2, 3, 4 are the same for all four longs
// (Warning: in the bit matrix above, low bits are at the left!)
e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa;
e ^= BIT0MASK(message >> 1) & 0xcccccccc;
e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0;
e ^= BIT0MASK(message >> 3) & 0xff00ff00;
e ^= BIT0MASK(message >> 4) & 0xffff0000;
// we can store this in the first quarter
word[0 + 0] = (e >> 0x00) & 0xff;
word[0 + 1] = (e >> 0x08) & 0xff;
word[0 + 2] = (e >> 0x10) & 0xff;
word[0 + 3] = (e >> 0x18) & 0xff;
// bit 5 flips entries 1 and 3; bit 6 flips 2 and 3
e ^= BIT0MASK(message >> 5);
word[4 + 0] = (e >> 0x00) & 0xff;
word[4 + 1] = (e >> 0x08) & 0xff;
word[4 + 2] = (e >> 0x10) & 0xff;
word[4 + 3] = (e >> 0x18) & 0xff;
e ^= BIT0MASK(message >> 6);
word[12 + 0] = (e >> 0x00) & 0xff;
word[12 + 1] = (e >> 0x08) & 0xff;
word[12 + 2] = (e >> 0x10) & 0xff;
word[12 + 3] = (e >> 0x18) & 0xff;
e ^= BIT0MASK(message >> 5);
word[8 + 0] = (e >> 0x00) & 0xff;
word[8 + 1] = (e >> 0x08) & 0xff;
word[8 + 2] = (e >> 0x10) & 0xff;
word[8 + 3] = (e >> 0x18) & 0xff;
}



/**
* @brief Hadamard transform
*
* Perform hadamard transform of src and store result in dst
* src is overwritten: it is also used as intermediate buffer
* Method is best explained if we use H(3) instead of H(7):
*
* The routine multiplies by the matrix H(3):
* [1 1 1 1 1 1 1 1]
* [1 -1 1 -1 1 -1 1 -1]
* [1 1 -1 -1 1 1 -1 -1]
* [a b c d e f g h] * [1 -1 -1 1 1 -1 -1 1] = result of routine
* [1 1 1 1 -1 -1 -1 -1]
* [1 -1 1 -1 -1 1 -1 1]
* [1 1 -1 -1 -1 -1 1 1]
* [1 -1 -1 1 -1 1 1 -1]
* You can do this in three passes, where each pass does this:
* set lower half of buffer to pairwise sums,
* and upper half to differences
* index 0 1 2 3 4 5 6 7
* input: a, b, c, d, e, f, g, h
* pass 1: a+b, c+d, e+f, g+h, a-b, c-d, e-f, g-h
* pass 2: a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h
* pass 3: a+b+c+d+e+f+g+h a+b-c-d+e+f-g-h a+b+c+d-e-f-g-h a+b-c-d-e+-f+g+h
* a-b+c-d+e-f+g-h a-b-c+d+e-f-g+h a-b+c-d-e+f-g+h a-b-c+d-e+f+g-h
* This order of computation is chosen because it vectorises well.
* Likewise, this routine multiplies by H(7) in seven passes.
*
* @param[out] src Structure that contain the expanded codeword
* @param[out] dst Structure that contain the expanded codeword
*/
static void hadamard(uint16_t src[128], uint16_t dst[128]) {
// the passes move data:
// src -> dst -> src -> dst -> src -> dst -> src -> dst
// using p1 and p2 alternately
uint16_t *p1 = src;
uint16_t *p2 = dst;
uint16_t *p3;
for (uint32_t pass = 0; pass < 7; pass++) {
for (uint32_t i = 0; i < 64; i++) {
p2[i] = p1[2 * i] + p1[2 * i + 1];
p2[i + 64] = p1[2 * i] - p1[2 * i + 1];
}
// swap p1, p2 for next round
p3 = p1;
p1 = p2;
p2 = p3;
}
}



/**
* @brief Add multiple codewords into expanded codeword
*
* Accesses memory in order
* Note: this does not write the codewords as -1 or +1 as the green machine does
* instead, just 0 and 1 is used.
* The resulting hadamard transform has:
* all values are halved
* the first entry is 64 too high
*
* @param[out] dest Structure that contain the expanded codeword
* @param[in] src Structure that contain the codeword
*/
static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]) {
size_t part, bit, copy;
// start with the first copy
for (part = 0; part < 16; part++) {
for (bit = 0; bit < 8; bit++) {
dest[part * 8 + bit] = (uint16_t) ((src[part] >> bit) & 1);
}
}
// sum the rest of the copies
for (copy = 1; copy < MULTIPLICITY; copy++) {
for (part = 0; part < 16; part++) {
for (bit = 0; bit < 8; bit++) {
dest[part * 8 + bit] += (uint16_t) ((src[16 * copy + part] >> bit) & 1);
}
}
}
}



/**
* @brief Finding the location of the highest value
*
* This is the final step of the green machine: find the location of the highest value,
* and add 128 if the peak is positive
* if there are two identical peaks, the peak with smallest value
* in the lowest 7 bits it taken
* @param[in] transform Structure that contain the expanded codeword
*/
static uint8_t find_peaks(const uint16_t transform[128]) {
uint16_t peak_abs = 0;
uint16_t peak = 0;
uint16_t pos = 0;
uint16_t t, abs, mask;
for (uint16_t i = 0; i < 128; i++) {
t = transform[i];
abs = t ^ ((-(t >> 15)) & (t ^ -t)); // t = abs(t)
mask = -(((uint16_t)(peak_abs - abs)) >> 15);
peak ^= mask & (peak ^ t);
pos ^= mask & (pos ^ i);
peak_abs ^= mask & (peak_abs ^ abs);
}
pos |= 128 & ((peak >> 15) - 1);
return (uint8_t) pos;
}




/**
* @brief Encodes the received word
*
* The message consists of N1 bytes each byte is encoded into PARAM_N2 bits,
* or MULTIPLICITY repeats of 128 bits
*
* @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message
* @param[in] msg Array of size VEC_N1_SIZE_64 storing the message
*/
void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) {
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
// encode first word
encode(&cdw[16 * i * MULTIPLICITY], msg[i]);
// copy to other identical codewords
for (size_t copy = 1; copy < MULTIPLICITY; copy++) {
memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16);
}
}
}



/**
* @brief Decodes the received word
*
* Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane.
* The theory of error-correcting codes codes @cite macwilliams1977theory
*
* @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message
* @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word
*/
void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) {
uint16_t expanded[128];
uint16_t transform[128];
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
// collect the codewords
expand_and_sum(expanded, &cdw[16 * i * MULTIPLICITY]);
// apply hadamard transform
hadamard(expanded, transform);
// fix the first entry to get the half Hadamard transform
transform[0] -= 64 * MULTIPLICITY;
// finish the decoding
msg[i] = find_peaks(transform);
}
}

+ 18
- 0
src/kem/hqc/hqc-rmrs-128/clean/reed_muller.h Целия файл

@@ -0,0 +1,18 @@
#ifndef REED_MULLER_H
#define REED_MULLER_H


/**
* @file reed_muller.h
* Header file of reed_muller.c
*/
#include "parameters.h"
#include <stddef.h>
#include <stdint.h>

void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg);

void PQCLEAN_HQCRMRS128_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw);


#endif

+ 349
- 0
src/kem/hqc/hqc-rmrs-128/clean/reed_solomon.c Целия файл

@@ -0,0 +1,349 @@
#include "fft.h"
#include "gf.h"
#include "parameters.h"
#include "parsing.h"
#include "reed_solomon.h"
#include <stdint.h>
#include <stdio.h>
#include <string.h>
/**
* @file reed_solomon.c
* Constant time implementation of Reed-Solomon codes
*/


static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw);
static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
static void compute_roots(uint8_t *error, uint16_t *sigma);
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes);
static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error);
static void correct_errors(uint8_t *cdw, const uint16_t *error_values);

/**
* @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes
*
* Following @cite lin1983error (Chapter 4 - Cyclic Codes),
* We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register
* with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code.
*
* @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message
* @param[in] msg Array of size VEC_K_SIZE_64 storing the message
*/
void PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) {
size_t i, j, k;
uint8_t gate_value = 0;

uint16_t tmp[PARAM_G] = {0};
uint16_t PARAM_RS_POLY [] = {RS_POLY_COEFS};
uint8_t prev, x;

for (i = 0; i < PARAM_N1; ++i) {
cdw[i] = 0;
}

for (i = 0; i < PARAM_K; ++i) {
gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]);

for (j = 0; j < PARAM_G; ++j) {
tmp[j] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gate_value, PARAM_RS_POLY[j]);
}

prev = 0;
for (k = 0; k < PARAM_N1 - PARAM_K; k++) {
x = cdw[k];
cdw[k] = (uint8_t) (prev ^ tmp[k]);
prev = x;
}
}

memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K);
}



/**
* @brief Computes 2 * PARAM_DELTA syndromes
*
* @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes
* @param[in] cdw Array of size PARAM_N1 storing the received vector
*/
void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) {
for (size_t i = 0; i < 2 * PARAM_DELTA; ++i) {
for (size_t j = 1; j < PARAM_N1; ++j) {
syndromes[i] ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(cdw[j], alpha_ij_pow[i][j - 1]);
}
syndromes[i] ^= cdw[0];
}
}



/**
* @brief Computes the error locator polynomial (ELP) sigma
*
* This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes). <br>
* We use the letter p for rho which is initialized at -1. <br>
* The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X). <br>
* Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p. <br>
* sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated. <br>
* We can properly correct only if the degree of sigma does not exceed PARAM_DELTA.
* This means only the first PARAM_DELTA + 1 coefficients of sigma are of value
* and we only need to save its first PARAM_DELTA - 1 coefficients.
*
* @returns the degree of the ELP sigma
* @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP
* @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes
*/
static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) {
uint16_t deg_sigma = 0;
uint16_t deg_sigma_p = 0;
uint16_t deg_sigma_copy = 0;
uint16_t sigma_copy[PARAM_DELTA + 1] = {0};
uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1};
uint16_t pp = (uint16_t) -1; // 2*rho
uint16_t d_p = 1;
uint16_t d = syndromes[0];

uint16_t mask1, mask2, mask12;
uint16_t deg_X, deg_X_sigma_p;
uint16_t dd;
uint16_t mu;

uint16_t i;

sigma[0] = 1;
for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) {
// Save sigma in case we need it to update X_sigma_p
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA));
deg_sigma_copy = deg_sigma;

dd = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(d, PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(d_p));

for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
sigma[i] ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(dd, X_sigma_p[i]);
}

deg_X = mu - pp;
deg_X_sigma_p = deg_X + deg_sigma_p;

// mask1 = 0xffff if(d != 0) and 0 otherwise
mask1 = -((uint16_t) - d >> 15);

// mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise
mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15);

// mask12 = 0xffff if the deg_sigma increased and 0 otherwise
mask12 = mask1 & mask2;
deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma);

if (mu == (2 * PARAM_DELTA - 1)) {
break;
}

pp ^= mask12 & (mu ^ pp);
d_p ^= mask12 & (d ^ d_p);
for (i = PARAM_DELTA; i; --i) {
X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]);
}

deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p);
d = syndromes[mu + 1];

for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
d ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(sigma[i], syndromes[mu + 1 - i]);
}
}

return deg_sigma;
}



/**
* @brief Computes the error polynomial error from the error locator polynomial sigma
*
* See function PQCLEAN_HQCRMRS128_CLEAN_fft for more details.
*
* @param[out] error Array of 2^PARAM_M elements receiving the error polynomial
* @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error
* @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
*/
static void compute_roots(uint8_t *error, uint16_t *sigma) {
uint16_t w[1 << PARAM_M] = {0};

PQCLEAN_HQCRMRS128_CLEAN_fft(w, sigma, PARAM_DELTA + 1);
PQCLEAN_HQCRMRS128_CLEAN_fft_retrieve_error_poly(error, w);
}



/**
* @brief Computes the polynomial z(x)
*
* See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
*
* @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x)
* @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
* @param[in] degree Integer that is the degree of polynomial sigma
* @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes
*/
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) {
size_t i, j;
uint16_t mask;

z[0] = 1;

for (i = 1; i < PARAM_DELTA + 1; ++i) {
mask = -((uint16_t) (i - degree - 1) >> 15);
z[i] = mask & sigma[i];
}

z[1] ^= syndromes[0];

for (i = 2; i <= PARAM_DELTA; ++i) {
mask = -((uint16_t) (i - degree - 1) >> 15);
z[i] ^= mask & syndromes[i - 1];

for (j = 1; j < i; ++j) {
z[i] ^= mask & PQCLEAN_HQCRMRS128_CLEAN_gf_mul(sigma[j], syndromes[i - j - 1]);
}
}
}



/**
* @brief Computes the error values
*
* See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
*
* @param[out] error_values Array of PARAM_DELTA elements receiving the error values
* @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x)
* @param[in] z_degree Integer that is the degree of polynomial z(x)
* @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error
*/
static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) {
uint16_t beta_j[PARAM_DELTA] = {0};
uint16_t e_j[PARAM_DELTA] = {0};

uint16_t delta_counter;
uint16_t delta_real_value;
uint16_t found;
uint16_t mask1;
uint16_t mask2;
uint16_t tmp1;
uint16_t tmp2;
uint16_t inverse;
uint16_t inverse_power_j;

// Compute the beta_{j_i} page 31 of the documentation
delta_counter = 0;
for (size_t i = 0; i < PARAM_N1; i++) {
found = 0;
mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
for (size_t j = 0; j < PARAM_DELTA; j++) {
mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
beta_j[j] += mask1 & mask2 & gf_exp[i];
found += mask1 & mask2 & 1;
}
delta_counter += found;
}
delta_real_value = delta_counter;

// Compute the e_{j_i} page 31 of the documentation
for (size_t i = 0; i < PARAM_DELTA; ++i) {
tmp1 = 1;
tmp2 = 1;
inverse = PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(beta_j[i]);
inverse_power_j = 1;

for (size_t j = 1; j <= PARAM_DELTA; ++j) {
inverse_power_j = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(inverse_power_j, inverse);
tmp1 ^= PQCLEAN_HQCRMRS128_CLEAN_gf_mul(inverse_power_j, z[j]);
}
for (size_t k = 1; k < PARAM_DELTA; ++k) {
tmp2 = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA])));
}
mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value
e_j[i] = mask1 & PQCLEAN_HQCRMRS128_CLEAN_gf_mul(tmp1, PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(tmp2));
}

// Place the delta e_{j_i} values at the right coordinates of the output vector
delta_counter = 0;
for (size_t i = 0; i < PARAM_N1; ++i) {
found = 0;
mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
for (size_t j = 0; j < PARAM_DELTA; j++) {
mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
error_values[i] += mask1 & mask2 & e_j[j];
found += mask1 & mask2 & 1;
}
delta_counter += found;
}
}



/**
* @brief Correct the errors
*
* @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector
* @param[in] error Array of the error vector
* @param[in] error_values Array of PARAM_DELTA elements storing the error values
*/
static void correct_errors(uint8_t *cdw, const uint16_t *error_values) {
for (size_t i = 0; i < PARAM_N1; ++i) {
cdw[i] ^= error_values[i];
}
}



/**
* @brief Decodes the received word
*
* This function relies on six steps:
* <ol>
* <li> The first step, is the computation of the 2*PARAM_DELTA syndromes.
* <li> The second step is the computation of the error-locator polynomial sigma.
* <li> The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses.
* <li> The fourth step, is the polynomial z(x).
* <li> The fifth step, is the computation of the error values.
* <li> The sixth step is the correction of the errors in the received polynomial.
* </ol>
* For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error
*
* @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message
* @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word
*/
void PQCLEAN_HQCRMRS128_CLEAN_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) {
uint16_t syndromes[2 * PARAM_DELTA] = {0};
uint16_t sigma[1 << PARAM_FFT] = {0};
uint8_t error[1 << PARAM_M] = {0};
uint16_t z[PARAM_N1] = {0};
uint16_t error_values[PARAM_N1] = {0};
uint16_t deg;

// Calculate the 2*PARAM_DELTA syndromes
compute_syndromes(syndromes, cdw);

// Compute the error locator polynomial sigma
// Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room
deg = compute_elp(sigma, syndromes);

// Compute the error polynomial error
compute_roots(error, sigma);

// Compute the polynomial z(x)
compute_z_poly(z, sigma, deg, syndromes);

// Compute the error values
compute_error_values(error_values, z, error);

// Correct the errors
correct_errors(cdw, error_values);

// Retrieve the message from the decoded codeword
memcpy(msg, cdw + (PARAM_G - 1), PARAM_K);

}

+ 20
- 0
src/kem/hqc/hqc-rmrs-128/clean/reed_solomon.h
Файловите разлики са ограничени, защото са твърде много
Целия файл


+ 176
- 0
src/kem/hqc/hqc-rmrs-128/clean/vector.c Целия файл

@@ -0,0 +1,176 @@
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "vector.h"
#include <stdint.h>
#include <string.h>
/**
* @file vector.c
* @brief Implementation of vectors sampling and some utilities for the HQC scheme
*/


/**
* @brief Generates a vector of a given Hamming weight
*
* This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>. The vector
* is stored by position.
* To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
* 1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
* 2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times 70853\f$
* 3. If \f$ x \geq t\f$, go to 1
* 4. It return \f$ r = x \mod 70853\f$
*
* The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
*
* @param[in] v Pointer to an array
* @param[in] weight Integer that is the Hamming weight
* @param[in] ctx Pointer to the context of the seed expander
*/
void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight) {
size_t random_bytes_size = 3 * weight;
uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0}; // weight is expected to be <= PARAM_OMEGA_R
uint8_t inc;
size_t i, j;

i = 0;
j = random_bytes_size;
while (i < weight) {
do {
if (j == random_bytes_size) {
seedexpander(ctx, rand_bytes, random_bytes_size);
j = 0;
}

v[i] = ((uint32_t) rand_bytes[j++]) << 16;
v[i] |= ((uint32_t) rand_bytes[j++]) << 8;
v[i] |= rand_bytes[j++];

} while (v[i] >= UTILS_REJECTION_THRESHOLD);

v[i] = v[i] % PARAM_N;

inc = 1;
for (size_t k = 0; k < i; k++) {
if (v[k] == v[i]) {
inc = 0;
}
}
i += inc;
}
}



/**
* @brief Generates a vector of a given Hamming weight
*
* This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>.
* To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
* 1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
* 2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times 70853\f$
* 3. If \f$ x \geq t\f$, go to 1
* 4. It return \f$ r = x \mod 70853\f$
*
* The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
*
* @param[in] v Pointer to an array
* @param[in] weight Integer that is the Hamming weight
* @param[in] ctx Pointer to the context of the seed expander
*/
void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) {
uint32_t tmp[PARAM_OMEGA_R] = {0};

PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(ctx, tmp, weight);

for (size_t i = 0; i < weight; ++i) {
int32_t index = tmp[i] / 64;
int32_t pos = tmp[i] % 64;
v[index] |= ((uint64_t) 1) << pos;
}
}



/**
* @brief Generates a random vector of dimension <b>PARAM_N</b>
*
* This function generates a random binary vector of dimension <b>PARAM_N</b>. It generates a random
* array of bytes using the seedexpander function, and drop the extra bits using a mask.
*
* @param[in] v Pointer to an array
* @param[in] ctx Pointer to the context of the seed expander
*/
void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) {
uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0};

seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES);

PQCLEAN_HQCRMRS128_CLEAN_load8_arr(v, VEC_N_SIZE_64, rand_bytes, VEC_N_SIZE_BYTES);
v[VEC_N_SIZE_64 - 1] &= RED_MASK;
}



/**
* @brief Adds two vectors
*
* @param[out] o Pointer to an array that is the result
* @param[in] v1 Pointer to an array that is the first vector
* @param[in] v2 Pointer to an array that is the second vector
* @param[in] size Integer that is the size of the vectors
*/
void PQCLEAN_HQCRMRS128_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) {
for (uint32_t i = 0; i < size; ++i) {
o[i] = v1[i] ^ v2[i];
}
}



/**
* @brief Compares two vectors
*
* @param[in] v1 Pointer to an array that is first vector
* @param[in] v2 Pointer to an array that is second vector
* @param[in] size Integer that is the size of the vectors
* @returns 0 if the vectors are equals and a negative/psotive value otherwise
*/
uint8_t PQCLEAN_HQCRMRS128_CLEAN_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size) {
uint64_t r = 0;
for (size_t i = 0; i < size; i++) {
r |= v1[i] ^ v2[i];
}
r = (~r + 1) >> 63;
return (uint8_t) r;
}



/**
* @brief Resize a vector so that it contains <b>size_o</b> bits
*
* @param[out] o Pointer to the output vector
* @param[in] size_o Integer that is the size of the output vector in bits
* @param[in] v Pointer to the input vector
* @param[in] size_v Integer that is the size of the input vector in bits
*/
void PQCLEAN_HQCRMRS128_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) {
if (size_o < size_v) {
uint64_t mask = 0x7FFFFFFFFFFFFFFF;
int8_t val = 0;

if (size_o % 64) {
val = 64 - (size_o % 64);
}

memcpy(o, v, 8 * VEC_N1N2_SIZE_64);

for (int8_t i = 0; i < val; ++i) {
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i);
}
} else {
memcpy(o, v, 8 * CEIL_DIVIDE(size_v, 64));
}
}

+ 27
- 0
src/kem/hqc/hqc-rmrs-128/clean/vector.h Целия файл

@@ -0,0 +1,27 @@
#ifndef VECTOR_H
#define VECTOR_H


/**
* @file vector.h
* @brief Header file for vector.c
*/
#include "nistseedexpander.h"
#include "randombytes.h"
#include <stdint.h>

void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight);

void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight);

void PQCLEAN_HQCRMRS128_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v);


void PQCLEAN_HQCRMRS128_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size);

uint8_t PQCLEAN_HQCRMRS128_CLEAN_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size);

void PQCLEAN_HQCRMRS128_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v);


#endif

+ 16
- 0
src/kem/hqc/hqc-rmrs-192/avx2/CMakeLists.txt Целия файл

@@ -0,0 +1,16 @@
set(
SRC_AVX2_HQCRMRS192
code.c
fft.c
gf2x.c
gf.c
hqc.c
kem.c
parsing.c
reed_muller.c
reed_solomon.c
vector.c
)

define_kem_alg(hqcrmrs192_avx2
PQCLEAN_HQCRMRS192_CLEAN "${SRC_AVX2_HQCRMRS192}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 25
- 0
src/kem/hqc/hqc-rmrs-192/avx2/api.h Целия файл

@@ -0,0 +1,25 @@
#ifndef PQCLEAN_HQCRMRS192_AVX2_API_H
#define PQCLEAN_HQCRMRS192_AVX2_API_H
/**
* @file api.h
* @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
*/

#define PQCLEAN_HQCRMRS192_AVX2_CRYPTO_ALGNAME "HQC-RMRS-192"

#define PQCLEAN_HQCRMRS192_AVX2_CRYPTO_SECRETKEYBYTES 4562
#define PQCLEAN_HQCRMRS192_AVX2_CRYPTO_PUBLICKEYBYTES 4522
#define PQCLEAN_HQCRMRS192_AVX2_CRYPTO_BYTES 64
#define PQCLEAN_HQCRMRS192_AVX2_CRYPTO_CIPHERTEXTBYTES 9026

// As a technicality, the public key is appended to the secret key in order to respect the NIST API.
// Without this constraint, PQCLEAN_HQCRMRS192_AVX2_CRYPTO_SECRETKEYBYTES would be defined as 32

int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


#endif

+ 47
- 0
src/kem/hqc/hqc-rmrs-192/avx2/code.c Целия файл

@@ -0,0 +1,47 @@
#include "code.h"
#include "parameters.h"
#include "reed_muller.h"
#include "reed_solomon.h"
#include <stdint.h>
#include <string.h>
/**
* @file code.c
* @brief Implementation of concatenated code
*/



/**
*
* @brief Encoding the message m to a code word em using the concatenated code
*
* First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain
* a concatenated code word.
*
* @param[out] em Pointer to an array that is the tensor code word
* @param[in] m Pointer to an array that is the message
*/
void PQCLEAN_HQCRMRS192_AVX2_code_encode(uint8_t *em, const uint8_t *m) {
uint8_t tmp[8 * VEC_N1_SIZE_64] = {0};

PQCLEAN_HQCRMRS192_AVX2_reed_solomon_encode(tmp, m);
PQCLEAN_HQCRMRS192_AVX2_reed_muller_encode(em, tmp);

}



/**
* @brief Decoding the code word em to a message m using the concatenated code
*
* @param[out] m Pointer to an array that is the message
* @param[in] em Pointer to an array that is the code word
*/
void PQCLEAN_HQCRMRS192_AVX2_code_decode(uint8_t *m, const uint8_t *em) {
uint8_t tmp[8 * VEC_N1_SIZE_64] = {0};

PQCLEAN_HQCRMRS192_AVX2_reed_muller_decode(tmp, em);
PQCLEAN_HQCRMRS192_AVX2_reed_solomon_decode(m, tmp);


}

+ 18
- 0
src/kem/hqc/hqc-rmrs-192/avx2/code.h Целия файл

@@ -0,0 +1,18 @@
#ifndef CODE_H
#define CODE_H


/**
* @file code.h
* Header file of code.c
*/
#include "parameters.h"
#include <stddef.h>
#include <stdint.h>

void PQCLEAN_HQCRMRS192_AVX2_code_encode(uint8_t *em, const uint8_t *message);

void PQCLEAN_HQCRMRS192_AVX2_code_decode(uint8_t *m, const uint8_t *em);


#endif

+ 351
- 0
src/kem/hqc/hqc-rmrs-192/avx2/fft.c Целия файл

@@ -0,0 +1,351 @@
#include "fft.h"
#include "gf.h"
#include "parameters.h"
#include <stdint.h>
#include <string.h>
/**
* @file fft.c
* Implementation of the additive FFT and its transpose.
* This implementation is based on the paper from Gao and Mateer: <br>
* Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
* IEEE Transactions on Information Theory 56 (2010), 6265--6272.
* http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
* and includes improvements proposed by Bernstein, Chou and Schwabe here:
* https://binary.cr.yp.to/mcbits-20130616.pdf
*/


static void compute_fft_betas(uint16_t *betas);
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size);
static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);


/**
* @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose
*
* @param[out] betas Array of size PARAM_M-1
*/
static void compute_fft_betas(uint16_t *betas) {
size_t i;
for (i = 0; i < PARAM_M - 1; ++i) {
betas[i] = 1 << (PARAM_M - 1 - i);
}
}



/**
* @brief Computes the subset sums of the given set
*
* The array subset_sums is such that its ith element is
* the subset sum of the set elements given by the binary form of i.
*
* @param[out] subset_sums Array of size 2^set_size receiving the subset sums
* @param[in] set Array of set_size elements
* @param[in] set_size Size of the array set
*/
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) {
uint16_t i, j;
subset_sums[0] = 0;

for (i = 0; i < set_size; ++i) {
for (j = 0; j < (1 << i); ++j) {
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j];
}
}
}



/**
* @brief Computes the radix conversion of a polynomial f in GF(2^m)[x]
*
* Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x)
* as proposed by Bernstein, Chou and Schwabe:
* https://binary.cr.yp.to/mcbits-20130616.pdf
*
* @param[out] f0 Array half the size of f
* @param[out] f1 Array half the size of f
* @param[in] f Array of size a power of 2
* @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
*/
static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
switch (m_f) {
case 4:
f0[4] = f[8] ^ f[12];
f0[6] = f[12] ^ f[14];
f0[7] = f[14] ^ f[15];
f1[5] = f[11] ^ f[13];
f1[6] = f[13] ^ f[14];
f1[7] = f[15];
f0[5] = f[10] ^ f[12] ^ f1[5];
f1[4] = f[9] ^ f[13] ^ f0[5];

f0[0] = f[0];
f1[3] = f[7] ^ f[11] ^ f[15];
f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3];
f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3];
f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3];
f1[2] = f[3] ^ f1[1] ^ f0[3];
f0[1] = f[2] ^ f0[2] ^ f1[1];
f1[0] = f[1] ^ f0[1];
break;

case 3:
f0[0] = f[0];
f0[2] = f[4] ^ f[6];
f0[3] = f[6] ^ f[7];
f1[1] = f[3] ^ f[5] ^ f[7];
f1[2] = f[5] ^ f[6];
f1[3] = f[7];
f0[1] = f[2] ^ f0[2] ^ f1[1];
f1[0] = f[1] ^ f0[1];
break;

case 2:
f0[0] = f[0];
f0[1] = f[2] ^ f[3];
f1[0] = f[1] ^ f0[1];
f1[1] = f[3];
break;

case 1:
f0[0] = f[0];
f1[0] = f[1];
break;

default:
radix_big(f0, f1, f, m_f);
break;
}
}

static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0};
uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0};

uint16_t Q0[1 << (PARAM_FFT - 2)] = {0};
uint16_t Q1[1 << (PARAM_FFT - 2)] = {0};
uint16_t R0[1 << (PARAM_FFT - 2)] = {0};
uint16_t R1[1 << (PARAM_FFT - 2)] = {0};

size_t i, n;

n = 1;
n <<= (m_f - 2);
memcpy(Q, f + 3 * n, 2 * n);
memcpy(Q + n, f + 3 * n, 2 * n);
memcpy(R, f, 4 * n);

for (i = 0; i < n; ++i) {
Q[i] ^= f[2 * n + i];
R[n + i] ^= Q[i];
}

radix(Q0, Q1, Q, m_f - 1);
radix(R0, R1, R, m_f - 1);

memcpy(f0, R0, 2 * n);
memcpy(f0 + n, Q0, 2 * n);
memcpy(f1, R1, 2 * n);
memcpy(f1 + n, Q1, 2 * n);
}



/**
* @brief Evaluates f at all subset sums of a given set
*
* This function is a subroutine of the function PQCLEAN_HQCRMRS192_AVX2_fft.
*
* @param[out] w Array
* @param[in] f Array
* @param[in] f_coeffs Number of coefficients of f
* @param[in] m Number of betas
* @param[in] m_f Number of coefficients of f (one more than its degree)
* @param[in] betas FFT constants
*/
static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
uint16_t f0[1 << (PARAM_FFT - 2)] = {0};
uint16_t f1[1 << (PARAM_FFT - 2)] = {0};
uint16_t gammas[PARAM_M - 2] = {0};
uint16_t deltas[PARAM_M - 2] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0};
uint16_t u[1 << (PARAM_M - 2)] = {0};
uint16_t v[1 << (PARAM_M - 2)] = {0};
uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0};

uint16_t beta_m_pow;
size_t i, j, k;
size_t x;

// Step 1
if (m_f == 1) {
for (i = 0; i < m; ++i) {
tmp[i] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(betas[i], f[1]);
}

w[0] = f[0];
x = 1;
for (j = 0; j < m; ++j) {
for (k = 0; k < x; ++k) {
w[x + k] = w[k] ^ tmp[j];
}
x <<= 1;
}

return;
}

// Step 2: compute g
if (betas[m - 1] != 1) {
beta_m_pow = 1;
x = 1;
x <<= m_f;
for (i = 1; i < x; ++i) {
beta_m_pow = PQCLEAN_HQCRMRS192_AVX2_gf_mul(beta_m_pow, betas[m - 1]);
f[i] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(beta_m_pow, f[i]);
}
}

// Step 3
radix(f0, f1, f, m_f);

// Step 4: compute gammas and deltas
for (i = 0; i + 1 < m; ++i) {
gammas[i] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(betas[i], PQCLEAN_HQCRMRS192_AVX2_gf_inverse(betas[m - 1]));
deltas[i] = PQCLEAN_HQCRMRS192_AVX2_gf_square(gammas[i]) ^ gammas[i];
}

// Compute gammas sums
compute_subset_sums(gammas_sums, gammas, m - 1);

// Step 5
fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);

k = 1;
k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small.
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant
w[0] = u[0];
w[k] = u[0] ^ f1[0];
for (i = 1; i < k; ++i) {
w[i] = u[i] ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(gammas_sums[i], f1[0]);
w[k + i] = w[i] ^ f1[0];
}
} else {
fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas);

// Step 6
memcpy(w + k, v, 2 * k);
w[0] = u[0];
w[k] ^= u[0];
for (i = 1; i < k; ++i) {
w[i] = u[i] ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(gammas_sums[i], v[i]);
w[k + i] ^= w[i];
}
}
}



/**
* @brief Evaluates f on all fields elements using an additive FFT algorithm
*
* f_coeffs is the number of coefficients of f (one less than its degree). <br>
* The FFT proceeds recursively to evaluate f at all subset sums of a basis B. <br>
* This implementation is based on the paper from Gao and Mateer: <br>
* Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
* IEEE Transactions on Information Theory 56 (2010), 6265--6272.
* http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
* and includes improvements proposed by Bernstein, Chou and Schwabe here:
* https://binary.cr.yp.to/mcbits-20130616.pdf <br>
* Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas,
* meaning the first gammas subset sums are actually the subset sums of betas (except 1). <br>
* Also note that f is altered during computation (twisted at each level).
*
* @param[out] w Array
* @param[in] f Array of 2^PARAM_FFT elements
* @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1)
*/
void PQCLEAN_HQCRMRS192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) {
uint16_t betas[PARAM_M - 1] = {0};
uint16_t betas_sums[1 << (PARAM_M - 1)] = {0};
uint16_t f0[1 << (PARAM_FFT - 1)] = {0};
uint16_t f1[1 << (PARAM_FFT - 1)] = {0};
uint16_t deltas[PARAM_M - 1] = {0};
uint16_t u[1 << (PARAM_M - 1)] = {0};
uint16_t v[1 << (PARAM_M - 1)] = {0};

size_t i, k;

// Follows Gao and Mateer algorithm
compute_fft_betas(betas);

// Step 1: PARAM_FFT > 1, nothing to do

// Compute gammas sums
compute_subset_sums(betas_sums, betas, PARAM_M - 1);

// Step 2: beta_m = 1, nothing to do

// Step 3
radix(f0, f1, f, PARAM_FFT);

// Step 4: Compute deltas
for (i = 0; i < PARAM_M - 1; ++i) {
deltas[i] = PQCLEAN_HQCRMRS192_AVX2_gf_square(betas[i]) ^ betas[i];
}

// Step 5
fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);
fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);

k = 1 << (PARAM_M - 1);
// Step 6, 7 and error polynomial computation
memcpy(w + k, v, 2 * k);

// Check if 0 is root
w[0] = u[0];

// Check if 1 is root
w[k] ^= u[0];

// Find other roots
for (i = 1; i < k; ++i) {
w[i] = u[i] ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(betas_sums[i], v[i]);
w[k + i] ^= w[i];
}
}



/**
* @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements.
*
* @param[out] error Array with the error
* @param[out] error_compact Array with the error in a compact form
* @param[in] w Array of size 2^PARAM_M
*/
void PQCLEAN_HQCRMRS192_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
uint16_t gammas[PARAM_M - 1] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
uint16_t k;
size_t i, index;

compute_fft_betas(gammas);
compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

k = 1 << (PARAM_M - 1);
error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15);
error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15);

for (i = 1; i < k; ++i) {
index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]];
error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15);

index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1];
error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15);
}
}

+ 18
- 0
src/kem/hqc/hqc-rmrs-192/avx2/fft.h Целия файл

@@ -0,0 +1,18 @@
#ifndef FFT_H
#define FFT_H


/**
* @file fft.h
* Header file of fft.c
*/

#include <stddef.h>
#include <stdint.h>

void PQCLEAN_HQCRMRS192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs);

void PQCLEAN_HQCRMRS192_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w);


#endif

+ 176
- 0
src/kem/hqc/hqc-rmrs-192/avx2/gf.c Целия файл

@@ -0,0 +1,176 @@
#include "gf.h"
#include "parameters.h"
#include <stdint.h>
/**
* @file gf.c
* Galois field implementation with multiplication using the pclmulqdq instruction
*/


static uint16_t gf_reduce(uint64_t x, size_t deg_x);



/**
* Reduces polynomial x modulo primitive polynomial GF_POLY.
* @returns x mod GF_POLY
* @param[in] x Polynomial of degree less than 64
* @param[in] deg_x The degree of polynomial x
*/
static uint16_t gf_reduce(uint64_t x, size_t deg_x) {
uint16_t z1, z2, rmdr, dist;
uint64_t mod;
size_t steps, i, j;

// Deduce the number of steps of reduction
steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2);

// Reduce
for (i = 0; i < steps; ++i) {
mod = x >> PARAM_M;
x &= (1 << PARAM_M) - 1;
x ^= mod;

z1 = 0;
rmdr = PARAM_GF_POLY ^ 1;
for (j = PARAM_GF_POLY_WT - 2; j; --j) {
z2 = __tzcnt_u16(rmdr);
dist = (uint16_t) (z2 - z1);
mod <<= dist;
x ^= mod;
rmdr ^= 1 << z2;
z1 = z2;
}
}

return x;
}



/**
* Multiplies two elements of GF(2^GF_M).
* @returns the product a*b
* @param[in] a Element of GF(2^GF_M)
* @param[in] b Element of GF(2^GF_M)
*/
uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_mul(uint16_t a, uint16_t b) {
__m128i va = _mm_cvtsi32_si128(a);
__m128i vb = _mm_cvtsi32_si128(b);
__m128i vab = _mm_clmulepi64_si128(va, vb, 0);
uint32_t ab = _mm_cvtsi128_si32(vab);

return gf_reduce(ab, 2 * (PARAM_M - 1));
}



/**
* Compute 16 products in GF(2^GF_M).
* @returns the product (a0b0,a1b1,...,a15b15) , ai,bi in GF(2^GF_M)
* @param[in] a 256-bit register where a0,..,a15 are stored as 16 bit integers
* @param[in] b 256-bit register where b0,..,b15 are stored as 16 bit integer
*
*/
__m256i PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(__m256i a, __m256i b) {
__m128i al = _mm256_extractf128_si256(a, 0);
__m128i ah = _mm256_extractf128_si256(a, 1);
__m128i bl = _mm256_extractf128_si256(b, 0);
__m128i bh = _mm256_extractf128_si256(b, 1);

__m128i abl0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x0);
abl0 &= CONST128_MIDDLEMASKL;
abl0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH);

__m128i abh0 = _mm_clmulepi64_si128(al & CONST128_MASKL, bl & CONST128_MASKL, 0x11);
abh0 &= CONST128_MIDDLEMASKL;
abh0 ^= (_mm_clmulepi64_si128(al & CONST128_MASKH, bl & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH);

abl0 = _mm_shuffle_epi8(abl0, CONST128_INDEXL);
abl0 ^= _mm_shuffle_epi8(abh0, CONST128_INDEXH);

__m128i abl1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x0);
abl1 &= CONST128_MIDDLEMASKL;
abl1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x0) & CONST128_MIDDLEMASKH);

__m128i abh1 = _mm_clmulepi64_si128(ah & CONST128_MASKL, bh & CONST128_MASKL, 0x11);
abh1 &= CONST128_MIDDLEMASKL;
abh1 ^= (_mm_clmulepi64_si128(ah & CONST128_MASKH, bh & CONST128_MASKH, 0x11) & CONST128_MIDDLEMASKH);

abl1 = _mm_shuffle_epi8(abl1, CONST128_INDEXL);
abl1 ^= _mm_shuffle_epi8(abh1, CONST128_INDEXH);

__m256i ret = _mm256_set_m128i(abl1, abl0);

__m256i aux = CONST256_MR0;

for (int32_t i = 0; i < 7; i++) {
ret ^= red[i] & _mm256_cmpeq_epi16((ret & aux), aux);
aux = aux << 1;
}

ret &= CONST256_LASTMASK;
return ret;
}



/**
* Squares an element of GF(2^GF_M).
* @returns a^2
* @param[in] a Element of GF(2^GF_M)
*/
uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_square(uint16_t a) {
uint32_t b = a;
uint32_t s = b & 1;
for (size_t i = 1; i < PARAM_M; ++i) {
b <<= 1;
s ^= b & (1 << 2 * i);
}

return gf_reduce(s, 2 * (PARAM_M - 1));
}



/**
* Computes the inverse of an element of GF(2^8),
* using the addition chain 1 2 3 4 7 11 15 30 60 120 127 254
* @returns the inverse of a
* @param[in] a Element of GF(2^GF_M)
*/
uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_inverse(uint16_t a) {
uint16_t inv = a;
uint16_t tmp1, tmp2;

inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(a); /* a^2 */
tmp1 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, a); /* a^3 */
inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^4 */
tmp2 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, tmp1); /* a^7 */
tmp1 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, tmp2); /* a^11 */
inv = PQCLEAN_HQCRMRS192_AVX2_gf_mul(tmp1, inv); /* a^15 */
inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^30 */
inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^60 */
inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^120 */
inv = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inv, tmp2); /* a^127 */
inv = PQCLEAN_HQCRMRS192_AVX2_gf_square(inv); /* a^254 */
return inv;
}



/**
* Returns i modulo 2^GF_M-1.
* i must be less than 2*(2^GF_M-1).
* Therefore, the return value is either i or i-2^GF_M+1.
* @returns i mod (2^GF_M-1)
* @param[in] i The integer whose modulo is taken
*/
uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_mod(uint16_t i) {
uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER);

// mask = 0xffff if (i < GF_MUL_ORDER)
uint16_t mask = -(tmp >> 15);

return tmp + (mask & PARAM_GF_MUL_ORDER);
}

+ 69
- 0
src/kem/hqc/hqc-rmrs-192/avx2/gf.h Целия файл

@@ -0,0 +1,69 @@
#ifndef GF_H
#define GF_H


/**
* @file gf.h
* Header file of gf.c
*/

#include <immintrin.h>
#include <stddef.h>
#include <stdint.h>

#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)

/**
* Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8.
* The last two elements are needed by the PQCLEAN_HQCRMRS192_AVX2_gf_mul function
* (for example if both elements to multiply are zero).
*/
static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 };



/**
* Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8).
* The logarithm of 0 is set to 0 by convention.
*/
static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 };

/**
* Masks needed for the computation of 16 mult in GF(2^M)
*/
#define CONST256_MR0 _mm256_set1_epi64x((long long) 0x0100010001000100)
#define CONST256_LASTMASK _mm256_set1_epi64x((long long) 0x00ff00ff00ff00ff)
#define CONST128_MASKL _mm_set1_epi64x((long long) 0x0000ffff0000ffff)
#define CONST128_MASKH _mm_set1_epi64x((long long) 0xffff0000ffff0000)
#define CONST128_MIDDLEMASKL _mm_set1_epi64x((long long) 0x000000000000ffff)
#define CONST128_MIDDLEMASKH _mm_set1_epi64x((long long) 0x0000ffff00000000)
#define CONST128_INDEXH _mm_set_epi64x((long long) 0x0d0c090805040100, (long long) 0xffffffffffffffff)
#define CONST128_INDEXL _mm_set_epi64x((long long) 0xffffffffffffffff, (long long) 0x0d0c090805040100)

/**
* x^i modulo x^8+x^4+x^3+x^2+1 duplicate 4 times to fit a 256-bit register
*/
static const __m256i red[7] = {
{0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL, 0x001d001d001d001dUL},
{0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL, 0x003a003a003a003aUL},
{0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL, 0x0074007400740074UL},
{0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL, 0x00e800e800e800e8UL},
{0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL, 0x00cd00cd00cd00cdUL},
{0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL, 0x0087008700870087UL},
{0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL, 0x0013001300130013UL},

};


uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_mul(uint16_t a, uint16_t b);

__m256i PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(__m256i a, __m256i b);

uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_square(uint16_t a);

uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_inverse(uint16_t a);

uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_mod(uint16_t i);


#endif

+ 408
- 0
src/kem/hqc/hqc-rmrs-192/avx2/gf2x.c Целия файл

@@ -0,0 +1,408 @@
#include "gf2x.h"
#include "parameters.h"
#include <immintrin.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
/**
* \file gf2x.c
* \brief AVX2 implementation of multiplication of two polynomials
*/



#define VEC_N_SPLIT_3x3 CEIL_DIVIDE(PARAM_N/9, 256)
#define VEC_N_SPLIT_3 (3*VEC_N_SPLIT_3x3)

static inline void reduce(uint64_t *o, const __m256i *a);
static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B);
static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B);
static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B);
static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B);
static inline void karat_mult_16(__m256i *C, const __m256i *A, const __m256i *B);
static inline void karat_three_way_mult(__m256i *C, const __m256i *A, const __m256i *B);
static inline void karat_mult9(__m256i *C, const aligned_vec_t *A, const aligned_vec_t *B);


/**
* @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$
*
* This function computes the modular reduction of the polynomial a(x)
*
* @param[out] o Pointer to the result
* @param[in] a Pointer to the polynomial a(x)
*/
static inline void reduce(uint64_t *o, const __m256i *a256) {
size_t i, i2;
__m256i r256, carry256;
__m256i *o256 = (__m256i *)o;
const uint64_t *a64 = (const uint64_t *)a256;
uint64_t r, carry;

i2 = 0;
for (i = (PARAM_N >> 6); i < (PARAM_N >> 5) - 4; i += 4) {
r256 = _mm256_lddqu_si256((const __m256i *) (& a64[i]));
r256 = _mm256_srli_epi64(r256, PARAM_N & 63);
carry256 = _mm256_lddqu_si256((const __m256i *) (& a64[i + 1]));
carry256 = _mm256_slli_epi64(carry256, (-PARAM_N) & 63);
r256 ^= carry256;
_mm256_storeu_si256(&o256[i2], a256[i2] ^ r256);
i2 += 1;
}

i = i - (PARAM_N >> 6);
for (; i < (PARAM_N >> 6) + 1; i++) {
r = a64[i + (PARAM_N >> 6)] >> (PARAM_N & 63);
carry = a64[i + (PARAM_N >> 6) + 1] << ((-PARAM_N) & 63);
r ^= carry;
o[i] = a64[i] ^ r;
}

o[PARAM_N >> 6] &= RED_MASK;
}



/**
* @brief Compute C(x) = A(x)*B(x)
* A(x) and B(x) are stored in 128-bit registers
* This function computes A(x)*B(x) using Karatsuba
*
* @param[out] C Pointer to the result
* @param[in] A Pointer to the polynomial A(x)
* @param[in] B Pointer to the polynomial B(x)
*/
static inline void karat_mult_1(__m128i *C, const __m128i *A, const __m128i *B) {
__m128i D1[2];
__m128i D0[2], D2[2];
__m128i Al = _mm_loadu_si128(A);
__m128i Ah = _mm_loadu_si128(A + 1);
__m128i Bl = _mm_loadu_si128(B);
__m128i Bh = _mm_loadu_si128(B + 1);

// Compute Al.Bl=D0
__m128i DD0 = _mm_clmulepi64_si128(Al, Bl, 0);
__m128i DD2 = _mm_clmulepi64_si128(Al, Bl, 0x11);
__m128i AAlpAAh = _mm_xor_si128(Al, _mm_shuffle_epi32(Al, 0x4e));
__m128i BBlpBBh = _mm_xor_si128(Bl, _mm_shuffle_epi32(Bl, 0x4e));
__m128i DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
D0[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
D0[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

// Compute Ah.Bh=D2
DD0 = _mm_clmulepi64_si128(Ah, Bh, 0);
DD2 = _mm_clmulepi64_si128(Ah, Bh, 0x11);
AAlpAAh = _mm_xor_si128(Ah, _mm_shuffle_epi32(Ah, 0x4e));
BBlpBBh = _mm_xor_si128(Bh, _mm_shuffle_epi32(Bh, 0x4e));
DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
D2[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
D2[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

// Compute AlpAh.BlpBh=D1
// Initialisation of AlpAh and BlpBh
__m128i AlpAh = _mm_xor_si128(Al, Ah);
__m128i BlpBh = _mm_xor_si128(Bl, Bh);
DD0 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0);
DD2 = _mm_clmulepi64_si128(AlpAh, BlpBh, 0x11);
AAlpAAh = _mm_xor_si128(AlpAh, _mm_shuffle_epi32(AlpAh, 0x4e));
BBlpBBh = _mm_xor_si128(BlpBh, _mm_shuffle_epi32(BlpBh, 0x4e));
DD1 = _mm_xor_si128(_mm_xor_si128(DD0, DD2), _mm_clmulepi64_si128(AAlpAAh, BBlpBBh, 0));
D1[0] = _mm_xor_si128(DD0, _mm_unpacklo_epi64(_mm_setzero_si128(), DD1));
D1[1] = _mm_xor_si128(DD2, _mm_unpackhi_epi64(DD1, _mm_setzero_si128()));

// Final comutation of C
__m128i middle = _mm_xor_si128(D0[1], D2[0]);
C[0] = D0[0];
C[1] = middle ^ D0[0] ^ D1[0];
C[2] = middle ^ D1[1] ^ D2[1];
C[3] = D2[1];
}



/**
* @brief Compute C(x) = A(x)*B(x)
*
* This function computes A(x)*B(x) using Karatsuba
* A(x) and B(x) are stored in 256-bit registers
* @param[out] C Pointer to the result
* @param[in] A Pointer to the polynomial A(x)
* @param[in] B Pointer to the polynomial B(x)
*/
static inline void karat_mult_2(__m256i *C, const __m256i *A, const __m256i *B) {
__m256i D0[2], D1[2], D2[2], SAA, SBB;
const __m128i *A128 = (const __m128i *)A;
const __m128i *B128 = (const __m128i *)B;
__m256i middle;

karat_mult_1((__m128i *) D0, A128, B128);
karat_mult_1((__m128i *) D2, A128 + 2, B128 + 2);

SAA = A[0] ^ A[1];
SBB = B[0] ^ B[1];
karat_mult_1((__m128i *) D1, (__m128i *) &SAA, (__m128i *) &SBB);
middle = _mm256_xor_si256(D0[1], D2[0]);

C[0] = D0[0];
C[1] = middle ^ D0[0] ^ D1[0];
C[2] = middle ^ D1[1] ^ D2[1];
C[3] = D2[1];
}


/**
* @brief Compute C(x) = A(x)*B(x)
*
* This function computes A(x)*B(x) using Karatsuba
* A(x) and B(x) are stored in 256-bit registers
* @param[out] C Pointer to the result
* @param[in] A Pointer to the polynomial A(x)
* @param[in] B Pointer to the polynomial B(x)
*/
static inline void karat_mult_4(__m256i *C, const __m256i *A, const __m256i *B) {
__m256i D0[4], D1[4], D2[4], SAA[2], SBB[2];
__m256i middle0;
__m256i middle1;

karat_mult_2(D0, A, B);
karat_mult_2(D2, A + 2, B + 2);

SAA[0] = A[0] ^ A[2];
SBB[0] = B[0] ^ B[2];
SAA[1] = A[1] ^ A[3];
SBB[1] = B[1] ^ B[3];

karat_mult_2(D1, SAA, SBB);

middle0 = _mm256_xor_si256(D0[2], D2[0]);
middle1 = _mm256_xor_si256(D0[3], D2[1]);

C[0] = D0[0];
C[1] = D0[1];
C[2] = middle0 ^ D0[0] ^ D1[0];
C[3] = middle1 ^ D0[1] ^ D1[1];
C[4] = middle0 ^ D1[2] ^ D2[2];
C[5] = middle1 ^ D1[3] ^ D2[3];
C[6] = D2[2];
C[7] = D2[3];
}



/**
* @brief Compute C(x) = A(x)*B(x)
*
* This function computes A(x)*B(x) using Karatsuba
* A(x) and B(x) are stored in 256-bit registers
* @param[out] C Pointer to the result
* @param[in] A Pointer to the polynomial A(x)
* @param[in] B Pointer to the polynomial B(x)
*/
static inline void karat_mult_8(__m256i *C, const __m256i *A, const __m256i *B) {
size_t i, is, is2, is3;
__m256i D0[8], D1[8], D2[8], SAA[4], SBB[4];
__m256i middle;

karat_mult_4(D0, A, B);
karat_mult_4(D2, A + 4, B + 4);

for (i = 0; i < 4; i++) {
is = i + 4;
SAA[i] = A[i] ^ A[is];
SBB[i] = B[i] ^ B[is];
}

karat_mult_4(D1, SAA, SBB);

for (i = 0; i < 4; i++) {
is = i + 4;
is2 = is + 4;
is3 = is2 + 4;

middle = _mm256_xor_si256(D0[is], D2[i]);

C[i] = D0[i];
C[is] = middle ^ D0[i] ^ D1[i];
C[is2] = middle ^ D1[is] ^ D2[is];
C[is3] = D2[is];
}
}



/**
* @brief Compute C(x) = A(x)*B(x)
*
* This function computes A(x)*B(x) using Karatsuba
* A(x) and B(x) are stored in 256-bit registers
* @param[out] C Pointer to the result
* @param[in] A Pointer to the polynomial A(x)
* @param[in] B Pointer to the polynomial B(x)
*/
inline static void karat_mult_16(__m256i *C, const __m256i *A, const __m256i *B) {
size_t i, is, is2, is3;
__m256i middle;
__m256i D0[16], D1[16], D2[16], SAA[8], SBB[8];

karat_mult_8(D0, A, B);
karat_mult_8(D2, A + 8, B + 8);

for (i = 0; i < 8; i++) {
is = i + 8;
SAA[i] = A[i] ^ A[is];
SBB[i] = B[i] ^ B[is];
}

karat_mult_8(D1, SAA, SBB);

for (i = 0; i < 8; i++) {
is = i + 8;
is2 = is + 8;
is3 = is2 + 8;

middle = D0[is] ^ D2[i];

C[i] = D0[i];
C[is] = middle ^ D0[i] ^ D1[i];
C[is2] = middle ^ D1[is] ^ D2[is];
C[is3] = D2[is];
}
}


/**
* @brief Compute C(x) = A(x)*B(x)
*
* This function computes A(x)*B(x) using Karatsuba 3 part split
* A(x) and B(x) are stored in 256-bit registers
* @param[out] C Pointer to the result
* @param[in] A Pointer to the polynomial A(x)
* @param[in] B Pointer to the polynomial B(x)
*/
static inline void karat_three_way_mult(__m256i *C, const __m256i *A, const __m256i *B) {
size_t i, j;
const __m256i *a0, *b0, *a1, *b1, *a2, *b2;
__m256i aa01[VEC_N_SPLIT_3x3], bb01[VEC_N_SPLIT_3x3], aa02[VEC_N_SPLIT_3x3], bb02[VEC_N_SPLIT_3x3], aa12[VEC_N_SPLIT_3x3], bb12[VEC_N_SPLIT_3x3];
__m256i D0[2 * VEC_N_SPLIT_3x3], D1[2 * VEC_N_SPLIT_3x3], D2[2 * VEC_N_SPLIT_3x3], D3[2 * VEC_N_SPLIT_3x3], D4[2 * VEC_N_SPLIT_3x3], D5[2 * VEC_N_SPLIT_3x3];
__m256i ro256[6 * VEC_N_SPLIT_3x3];
__m256i middle0;

a0 = A;
a1 = A + VEC_N_SPLIT_3x3;
a2 = A + (VEC_N_SPLIT_3x3 << 1);

b0 = B;
b1 = B + VEC_N_SPLIT_3x3;
b2 = B + (VEC_N_SPLIT_3x3 << 1);

for (i = 0; i < VEC_N_SPLIT_3x3; i++) {
aa01[i] = a0[i] ^ a1[i];
bb01[i] = b0[i] ^ b1[i];

aa12[i] = a2[i] ^ a1[i];
bb12[i] = b2[i] ^ b1[i];

aa02[i] = a0[i] ^ a2[i];
bb02[i] = b0[i] ^ b2[i];
}

karat_mult_16(D0, a0, b0);
karat_mult_16(D1, a1, b1);
karat_mult_16(D2, a2, b2);

karat_mult_16(D3, aa01, bb01);
karat_mult_16(D4, aa02, bb02);
karat_mult_16(D5, aa12, bb12);

for (i = 0; i < VEC_N_SPLIT_3x3; i++) {
j = i + VEC_N_SPLIT_3x3;
middle0 = D0[i] ^ D1[i] ^ D0[j];
ro256[i] = D0[i];
ro256[j] = D3[i] ^ middle0;
ro256[j + VEC_N_SPLIT_3x3] = D4[i] ^ D2[i] ^ D3[j] ^ D1[j] ^ middle0;
middle0 = D1[j] ^ D2[i] ^ D2[j];
ro256[j + (VEC_N_SPLIT_3x3 << 1)] = D5[i] ^ D4[j] ^ D0[j] ^ D1[i] ^ middle0;
ro256[i + (VEC_N_SPLIT_3x3 << 2)] = D5[j] ^ middle0;
ro256[j + (VEC_N_SPLIT_3x3 << 2)] = D2[j];
}

for (i = 0; i < 2 * VEC_N_SPLIT_3; i++) {
C[i] = ro256[i];
}
}



/**
* @brief Compute C(x) = A(x)*B(x)
*
* This function computes A(x)*B(x) using Karatsuba 3 part split
* A(x) and B(x) are stored in 256-bit registers
* @param[out] C Pointer to the result
* @param[in] A Pointer to the polynomial A(x)
* @param[in] B Pointer to the polynomial B(x)
*/
static inline void karat_mult9(__m256i *C, const aligned_vec_t *A, const aligned_vec_t *B) {
size_t i, j;
const __m256i *a0, *b0, *a1, *b1, *a2, *b2;
__m256i aa01[VEC_N_SPLIT_3], bb01[VEC_N_SPLIT_3], aa02[VEC_N_SPLIT_3], bb02[VEC_N_SPLIT_3], aa12[VEC_N_SPLIT_3], bb12[VEC_N_SPLIT_3];
__m256i D0[2 * VEC_N_SPLIT_3], D1[2 * VEC_N_SPLIT_3], D2[2 * VEC_N_SPLIT_3], D3[2 * VEC_N_SPLIT_3], D4[2 * VEC_N_SPLIT_3], D5[2 * VEC_N_SPLIT_3];
__m256i middle0;

a0 = (__m256i *)(A->arr64);
a1 = a0 + VEC_N_SPLIT_3;
a2 = a0 + (2 * VEC_N_SPLIT_3);

b0 = (__m256i *)(B->arr64);
b1 = b0 + VEC_N_SPLIT_3;
b2 = b0 + (2 * VEC_N_SPLIT_3);

for (i = 0; i < VEC_N_SPLIT_3; i++) {
aa01[i] = a0[i] ^ a1[i];
bb01[i] = b0[i] ^ b1[i];

aa12[i] = a2[i] ^ a1[i];
bb12[i] = b2[i] ^ b1[i];

aa02[i] = a0[i] ^ a2[i];
bb02[i] = b0[i] ^ b2[i];
}

karat_three_way_mult(D0, a0, b0);
karat_three_way_mult(D1, a1, b1);
karat_three_way_mult(D2, a2, b2);

karat_three_way_mult(D3, aa01, bb01);
karat_three_way_mult(D4, aa02, bb02);
karat_three_way_mult(D5, aa12, bb12);

for (i = 0; i < VEC_N_SPLIT_3; i++) {
j = i + VEC_N_SPLIT_3;
middle0 = D0[i] ^ D1[i] ^ D0[j];
C[i] = D0[i];
C[j] = D3[i] ^ middle0;
C[j + VEC_N_SPLIT_3] = D4[i] ^ D2[i] ^ D3[j] ^ D1[j] ^ middle0;
middle0 = D1[j] ^ D2[i] ^ D2[j];
C[j + (VEC_N_SPLIT_3 << 1)] = D5[i] ^ D4[j] ^ D0[j] ^ D1[i] ^ middle0;
C[i + (VEC_N_SPLIT_3 << 2)] = D5[j] ^ middle0;
C[j + (VEC_N_SPLIT_3 << 2)] = D2[j];
}
}



/**
* @brief Multiply two polynomials modulo \f$ X^n - 1\f$.
*
* This functions multiplies a dense polynomial <b>a1</b> (of Hamming weight equal to <b>weight</b>)
* and a dense polynomial <b>a2</b>. The multiplication is done modulo \f$ X^n - 1\f$.
*
* @param[out] o Pointer to the result
* @param[in] a1 Pointer to a polynomial
* @param[in] a2 Pointer to a polynomial
*/
void PQCLEAN_HQCRMRS192_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2) {
__m256i a1_times_a2[2 * PARAM_N_MULT + 1] = {0};
karat_mult9(a1_times_a2, a1, a2);
reduce(o, a1_times_a2);
}

+ 21
- 0
src/kem/hqc/hqc-rmrs-192/avx2/gf2x.h Целия файл

@@ -0,0 +1,21 @@
#ifndef GF2X_H
#define GF2X_H


/**
* @file gf2x.h
* @brief Header file for gf2x.c
*/
#include "parameters.h"
#include <immintrin.h>
#include <stdint.h>

typedef union {
uint64_t arr64[VEC_N_256_SIZE_64];
__m256i dummy;
} aligned_vec_t;

void PQCLEAN_HQCRMRS192_AVX2_vect_mul(uint64_t *o, const aligned_vec_t *a1, const aligned_vec_t *a2);


#endif

+ 168
- 0
src/kem/hqc/hqc-rmrs-192/avx2/hqc.c Целия файл

@@ -0,0 +1,168 @@
#include "code.h"
#include "gf2x.h"
#include "hqc.h"
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "vector.h"
#include <immintrin.h>
#include <stdint.h>
#include <string.h>
/**
* @file hqc.c
* @brief Implementation of hqc.h
*/



/**
* @brief Keygen of the HQC_PKE IND_CPA scheme
*
* The public key is composed of the syndrome <b>s</b> as well as the <b>seed</b> used to generate the vector <b>h</b>.
*
* The secret key is composed of the <b>seed</b> used to generate vectors <b>x</b> and <b>y</b>.
* As a technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] pk String containing the public key
* @param[out] sk String containing the secret key
*/
void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) {
AES_XOF_struct sk_seedexpander;
AES_XOF_struct pk_seedexpander;
uint8_t sk_seed[SEED_BYTES] = {0};
uint8_t pk_seed[SEED_BYTES] = {0};
aligned_vec_t vx = {0};
uint64_t *x = vx.arr64;
aligned_vec_t vy = {0};
uint64_t *y = vy.arr64;
aligned_vec_t vh = {0};
uint64_t *h = vh.arr64;
aligned_vec_t vs = {0};
uint64_t *s = vs.arr64;
aligned_vec_t vtmp = {0};
uint64_t *tmp = vtmp.arr64;

// Create seed_expanders for public key and secret key
randombytes(sk_seed, SEED_BYTES);
seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

randombytes(pk_seed, SEED_BYTES);
seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

// Compute secret key
PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA);

// Compute public key
PQCLEAN_HQCRMRS192_AVX2_vect_set_random(&pk_seedexpander, h);
PQCLEAN_HQCRMRS192_AVX2_vect_mul(tmp, &vy, &vh);
PQCLEAN_HQCRMRS192_AVX2_vect_add(s, x, tmp, VEC_N_256_SIZE_64);

// Parse keys to string
PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_to_string(pk, pk_seed, s);
PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_to_string(sk, sk_seed, pk);

}



/**
* @brief Encryption of the HQC_PKE IND_CPA scheme
*
* The cihertext is composed of vectors <b>u</b> and <b>v</b>.
*
* @param[out] u Vector u (first part of the ciphertext)
* @param[out] v Vector v (second part of the ciphertext)
* @param[in] m Vector representing the message to encrypt
* @param[in] theta Seed used to derive randomness required for encryption
* @param[in] pk String containing the public key
*/
void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) {
AES_XOF_struct seedexpander;
aligned_vec_t vh = {0};
uint64_t *h = vh.arr64;
aligned_vec_t vs = {0};
uint64_t *s = vs.arr64;
aligned_vec_t vr1 = {0};
uint64_t *r1 = vr1.arr64;
aligned_vec_t vr2 = {0};
uint64_t *r2 = vr2.arr64;
aligned_vec_t ve = {0};
uint64_t *e = ve.arr64;
aligned_vec_t vtmp1 = {0};
uint64_t *tmp1 = vtmp1.arr64;
aligned_vec_t vtmp2 = {0};
uint64_t *tmp2 = vtmp2.arr64;
aligned_vec_t vtmp3 = {0};
uint64_t *tmp3 = vtmp3.arr64;

// Create seed_expander from theta
seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH);

// Retrieve h and s from public key
PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_from_string(h, s, pk);

// Generate r1, r2 and e
PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R);
PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&seedexpander, r2, PARAM_OMEGA_R);
PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E);



// Compute u = r1 + r2.h
PQCLEAN_HQCRMRS192_AVX2_vect_mul(tmp1, &vr2, &vh);
PQCLEAN_HQCRMRS192_AVX2_vect_add(u, r1, tmp1, VEC_N_256_SIZE_64);

// Compute v = m.G by encoding the message
PQCLEAN_HQCRMRS192_AVX2_code_encode((uint8_t *)v, m);
PQCLEAN_HQCRMRS192_AVX2_load8_arr(v, VEC_N1N2_256_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES);
PQCLEAN_HQCRMRS192_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);

// Compute v = m.G + s.r2 + e
PQCLEAN_HQCRMRS192_AVX2_vect_mul(tmp2, &vr2, &vs);
PQCLEAN_HQCRMRS192_AVX2_vect_add(tmp3, e, tmp2, VEC_N_256_SIZE_64);
PQCLEAN_HQCRMRS192_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64);
PQCLEAN_HQCRMRS192_AVX2_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N);

}



/**
* @brief Decryption of the HQC_PKE IND_CPA scheme
*
* @param[out] m Vector representing the decrypted message
* @param[in] u Vector u (first part of the ciphertext)
* @param[in] v Vector v (second part of the ciphertext)
* @param[in] sk String containing the secret key
*/
void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) {
uint8_t pk[PUBLIC_KEY_BYTES] = {0};
aligned_vec_t vx = {0};
uint64_t *x = vx.arr64;
aligned_vec_t vy = {0};
uint64_t *y = vy.arr64;
aligned_vec_t vtmp1 = {0};
uint64_t *tmp1 = vtmp1.arr64;
aligned_vec_t vtmp2 = {0};
uint64_t *tmp2 = vtmp2.arr64;
aligned_vec_t vtmp3 = {0};
uint64_t *tmp3 = vtmp3.arr64;

// Retrieve x, y, pk from secret key
PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_from_string(x, y, pk, sk);

// Compute v - u.y
PQCLEAN_HQCRMRS192_AVX2_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);
for (size_t i = 0; i < VEC_N_256_SIZE_64; i++) {
tmp2[i] = u[i];
}
PQCLEAN_HQCRMRS192_AVX2_vect_mul(tmp3, &vy, &vtmp2);
PQCLEAN_HQCRMRS192_AVX2_vect_add(tmp2, tmp1, tmp3, VEC_N_256_SIZE_64);


// Compute m by decoding v - u.y
PQCLEAN_HQCRMRS192_AVX2_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_256_SIZE_64);
PQCLEAN_HQCRMRS192_AVX2_code_decode(m, (uint8_t *)tmp1);
}

+ 19
- 0
src/kem/hqc/hqc-rmrs-192/avx2/hqc.h Целия файл

@@ -0,0 +1,19 @@
#ifndef HQC_H
#define HQC_H


/**
* @file hqc.h
* @brief Functions of the HQC_PKE IND_CPA scheme
*/

#include <stdint.h>

void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_keygen(unsigned char *pk, unsigned char *sk);

void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk);

void PQCLEAN_HQCRMRS192_AVX2_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk);


#endif

+ 140
- 0
src/kem/hqc/hqc-rmrs-192/avx2/kem.c Целия файл

@@ -0,0 +1,140 @@
#include "api.h"
#include "fips202.h"
#include "hqc.h"
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "sha2.h"
#include "vector.h"
#include <stdint.h>
#include <string.h>
/**
* @file kem.c
* @brief Implementation of api.h
*/



/**
* @brief Keygen of the HQC_KEM IND_CAA2 scheme
*
* The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>.
*
* The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
* As a technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] pk String containing the public key
* @param[out] sk String containing the secret key
* @returns 0 if keygen is successful
*/
int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) {

PQCLEAN_HQCRMRS192_AVX2_hqc_pke_keygen(pk, sk);
return 0;
}



/**
* @brief Encapsulation of the HQC_KEM IND_CAA2 scheme
*
* @param[out] ct String containing the ciphertext
* @param[out] ss String containing the shared secret
* @param[in] pk String containing the public key
* @returns 0 if encapsulation is successful
*/
int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) {

uint8_t theta[SHA512_BYTES] = {0};
uint8_t m[VEC_K_SIZE_BYTES] = {0};
static uint64_t u[VEC_N_256_SIZE_64] = {0};
uint64_t v[VEC_N1N2_256_SIZE_64] = {0};
unsigned char d[SHA512_BYTES] = {0};
unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

// Computing m
randombytes(m, VEC_K_SIZE_BYTES);

// Computing theta
sha3_512(theta, m, VEC_K_SIZE_BYTES);

// Encrypting m
PQCLEAN_HQCRMRS192_AVX2_hqc_pke_encrypt(u, v, m, theta, pk);

// Computing d
sha512(d, m, VEC_K_SIZE_BYTES);

// Computing shared secret
memcpy(mc, m, VEC_K_SIZE_BYTES);
PQCLEAN_HQCRMRS192_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
PQCLEAN_HQCRMRS192_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

// Computing ciphertext
PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_to_string(ct, u, v, d);


return 0;
}



/**
* @brief Decapsulation of the HQC_KEM IND_CAA2 scheme
*
* @param[out] ss String containing the shared secret
* @param[in] ct String containing the cipĥertext
* @param[in] sk String containing the secret key
* @returns 0 if decapsulation is successful, -1 otherwise
*/
int PQCLEAN_HQCRMRS192_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) {

uint8_t result;
uint64_t u[VEC_N_256_SIZE_64] = {0};
uint64_t v[VEC_N1N2_256_SIZE_64] = {0};
unsigned char d[SHA512_BYTES] = {0};
unsigned char pk[PUBLIC_KEY_BYTES] = {0};
uint8_t m[VEC_K_SIZE_BYTES] = {0};
uint8_t theta[SHA512_BYTES] = {0};
uint64_t u2[VEC_N_256_SIZE_64] = {0};
uint64_t v2[VEC_N1N2_256_SIZE_64] = {0};
unsigned char d2[SHA512_BYTES] = {0};
unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

// Retrieving u, v and d from ciphertext
PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_from_string(u, v, d, ct);

// Retrieving pk from sk
memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);

// Decryting
PQCLEAN_HQCRMRS192_AVX2_hqc_pke_decrypt(m, u, v, sk);

// Computing theta
sha3_512(theta, m, VEC_K_SIZE_BYTES);

// Encrypting m'
PQCLEAN_HQCRMRS192_AVX2_hqc_pke_encrypt(u2, v2, m, theta, pk);

// Computing d'
sha512(d2, m, VEC_K_SIZE_BYTES);

// Computing shared secret
memcpy(mc, m, VEC_K_SIZE_BYTES);
PQCLEAN_HQCRMRS192_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_256_SIZE_64);
PQCLEAN_HQCRMRS192_AVX2_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

// Abort if c != c' or d != d'
result = PQCLEAN_HQCRMRS192_AVX2_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES);
result |= PQCLEAN_HQCRMRS192_AVX2_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES);
result |= PQCLEAN_HQCRMRS192_AVX2_vect_compare(d, d2, SHA512_BYTES);
result = (uint8_t) (-((int16_t) result) >> 15);
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) {
ss[i] &= ~result;
}


return -(result & 1);
}

+ 109
- 0
src/kem/hqc/hqc-rmrs-192/avx2/parameters.h Целия файл

@@ -0,0 +1,109 @@
#ifndef HQC_PARAMETERS_H
#define HQC_PARAMETERS_H


/**
* @file parameters.h
* @brief Parameters of the HQC_KEM IND-CCA2 scheme
*/
#include "api.h"


#define CEIL_DIVIDE(a, b) (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/

/*
#define PARAM_N Define the parameter n of the scheme
#define PARAM_N1 Define the parameter n1 of the scheme (length of Reed-Solomon code)
#define PARAM_N2 Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code)
#define PARAM_N1N2 Define the length in bits of the Concatenated code
#define PARAM_OMEGA Define the parameter omega of the scheme
#define PARAM_OMEGA_E Define the parameter omega_e of the scheme
#define PARAM_OMEGA_R Define the parameter omega_r of the scheme
#define PARAM_SECURITY Define the security level corresponding to the chosen parameters
#define PARAM_DFR_EXP Define the decryption failure rate corresponding to the chosen parameters

#define SECRET_KEY_BYTES Define the size of the secret key in bytes
#define PUBLIC_KEY_BYTES Define the size of the public key in bytes
#define SHARED_SECRET_BYTES Define the size of the shared secret in bytes
#define CIPHERTEXT_BYTES Define the size of the ciphertext in bytes

#define UTILS_REJECTION_THRESHOLD Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function)
#define VEC_N_SIZE_BYTES Define the size of the array used to store a PARAM_N sized vector in bytes
#define VEC_K_SIZE_BYTES Define the size of the array used to store a PARAM_K sized vector in bytes
#define VEC_N1Y_SIZE_BYTES Define the size of the array used to store a PARAM_N1 sized vector in bytes
#define VEC_N1N2_SIZE_BYTES Define the size of the array used to store a PARAM_N1N2 sized vector in bytes

#define VEC_N_SIZE_64 Define the size of the array used to store a PARAM_N sized vector in 64 bits
#define VEC_K_SIZE_64 Define the size of the array used to store a PARAM_K sized vector in 64 bits
#define VEC_N1_SIZE_64 Define the size of the array used to store a PARAM_N1 sized vector in 64 bits
#define VEC_N1N2_SIZE_64 Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits

#define VEC_N_256_SIZE_64 Define the size of the array of 64 bits elements used to store an array of size PARAM_N considered as elements of 256 bits
#define VEC_N1N2_256_SIZE_64 Define the size of the array of 64 bits elements used to store an array of size PARAM_N1N2 considered as elements of 256 bits

#define PARAM_DELTA Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code)
#define PARAM_M Define a positive integer
#define PARAM_GF_POLY Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form
#define PARAM_GF_POLY_WT Hamming weight of PARAM_GF_POLY
#define PARAM_GF_POLY_M2 Distance between the primitive polynomial first two set bits
#define PARAM_GF_MUL_ORDER Define the size of the multiplicative group of GF(2^PARAM_M), i.e 2^PARAM_M -1
#define PARAM_K Define the size of the information bits of the Reed-Solomon code
#define PARAM_G Define the size of the generator polynomial of Reed-Solomon code
#define PARAM_FFT The additive FFT takes a 2^PARAM_FFT polynomial as input
We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24
The smallest power of 2 greater than 24+1 is 32=2^5
#define RS_POLY_COEFS Coefficients of the generator polynomial of the Reed-Solomon code

#define RED_MASK A mask fot the higher bits of a vector
#define SHA512_BYTES Define the size of SHA512 output in bytes
#define SEED_BYTES Define the size of the seed in bytes
#define SEEDEXPANDER_MAX_LENGTH Define the seed expander max length
*/

#define PARAM_N 35851
#define PARAM_N1 56
#define PARAM_N2 640
#define PARAM_N1N2 35840
#define PARAM_OMEGA 100
#define PARAM_OMEGA_E 114
#define PARAM_OMEGA_R 114
#define PARAM_SECURITY 192
#define PARAM_DFR_EXP 192

#define SECRET_KEY_BYTES PQCLEAN_HQCRMRS192_AVX2_CRYPTO_SECRETKEYBYTES
#define PUBLIC_KEY_BYTES PQCLEAN_HQCRMRS192_AVX2_CRYPTO_PUBLICKEYBYTES
#define SHARED_SECRET_BYTES PQCLEAN_HQCRMRS192_AVX2_CRYPTO_BYTES
#define CIPHERTEXT_BYTES PQCLEAN_HQCRMRS192_AVX2_CRYPTO_CIPHERTEXTBYTES

#define UTILS_REJECTION_THRESHOLD 16742417
#define VEC_N_SIZE_BYTES CEIL_DIVIDE(PARAM_N, 8)
#define VEC_K_SIZE_BYTES PARAM_K
#define VEC_N1_SIZE_BYTES PARAM_N1
#define VEC_N1N2_SIZE_BYTES CEIL_DIVIDE(PARAM_N1N2, 8)

#define VEC_N_SIZE_64 CEIL_DIVIDE(PARAM_N, 64)
#define VEC_K_SIZE_64 CEIL_DIVIDE(PARAM_K, 8)
#define VEC_N1_SIZE_64 CEIL_DIVIDE(PARAM_N1, 8)
#define VEC_N1N2_SIZE_64 CEIL_DIVIDE(PARAM_N1N2, 64)

#define PARAM_N_MULT (9*256*CEIL_DIVIDE(CEIL_DIVIDE(PARAM_N, 9), 256))
#define VEC_N_256_SIZE_64 (PARAM_N_MULT / 64)
#define VEC_N1N2_256_SIZE_64 (CEIL_DIVIDE(PARAM_N1N2, 256) << 2)

#define PARAM_DELTA 16
#define PARAM_M 8
#define PARAM_GF_POLY 0x11D
#define PARAM_GF_POLY_WT 5
#define PARAM_GF_POLY_M2 4
#define PARAM_GF_MUL_ORDER 255
#define PARAM_K 24
#define PARAM_G 33
#define PARAM_FFT 5
#define RS_POLY_COEFS 45,216,239,24,253,104,27,40,107,50,163,210,227,134,224,158,119,13,158,1,238,164,82,43,15,232,246,142,50,189,29,232,1

#define RED_MASK 0x7ff
#define SHA512_BYTES 64
#define SEED_BYTES 40
#define SEEDEXPANDER_MAX_LENGTH 4294967295

#endif

+ 186
- 0
src/kem/hqc/hqc-rmrs-192/avx2/parsing.c Целия файл

@@ -0,0 +1,186 @@
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "vector.h"
#include <stdint.h>
#include <string.h>
/**
* @file parsing.c
* @brief Functions to parse secret key, public key and ciphertext of the HQC scheme
*/


void PQCLEAN_HQCRMRS192_AVX2_store8(unsigned char *out, uint64_t in) {
out[0] = (in >> 0x00) & 0xFF;
out[1] = (in >> 0x08) & 0xFF;
out[2] = (in >> 0x10) & 0xFF;
out[3] = (in >> 0x18) & 0xFF;
out[4] = (in >> 0x20) & 0xFF;
out[5] = (in >> 0x28) & 0xFF;
out[6] = (in >> 0x30) & 0xFF;
out[7] = (in >> 0x38) & 0xFF;
}


uint64_t PQCLEAN_HQCRMRS192_AVX2_load8(const unsigned char *in) {
uint64_t ret = in[7];

for (int8_t i = 6; i >= 0; i--) {
ret <<= 8;
ret |= in[i];
}

return ret;
}

void PQCLEAN_HQCRMRS192_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) {
size_t index_in = 0;
size_t index_out = 0;

// first copy by 8 bytes
if (inlen >= 8 && outlen >= 1) {
while (index_out < outlen && index_in + 8 <= inlen) {
out64[index_out] = PQCLEAN_HQCRMRS192_AVX2_load8(in8 + index_in);

index_in += 8;
index_out += 1;
}
}

// we now need to do the last 7 bytes if necessary
if (index_in >= inlen || index_out >= outlen) {
return;
}
out64[index_out] = in8[inlen - 1];
for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) {
out64[index_out] <<= 8;
out64[index_out] |= in8[index_in + i];
}
}

void PQCLEAN_HQCRMRS192_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) {
for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) {
out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF;
index_out++;
if (index_out % 8 == 0) {
index_in++;
}
}
}


/**
* @brief Parse a secret key into a string
*
* The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
* As technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] sk String containing the secret key
* @param[in] sk_seed Seed used to generate the secret key
* @param[in] pk String containing the public key
*/
void PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) {
memcpy(sk, sk_seed, SEED_BYTES);
sk += SEED_BYTES;
memcpy(sk, pk, PUBLIC_KEY_BYTES);
}

/**
* @brief Parse a secret key from a string
*
* The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
* As technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] x uint64_t representation of vector x
* @param[out] y uint64_t representation of vector y
* @param[out] pk String containing the public key
* @param[in] sk String containing the secret key
*/
void PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk) {
AES_XOF_struct sk_seedexpander;
uint8_t sk_seed[SEED_BYTES] = {0};

memcpy(sk_seed, sk, SEED_BYTES);
sk += SEED_BYTES;
memcpy(pk, sk, PUBLIC_KEY_BYTES);

seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(&sk_seedexpander, y, PARAM_OMEGA);
}

/**
* @brief Parse a public key into a string
*
* The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
*
* @param[out] pk String containing the public key
* @param[in] pk_seed Seed used to generate the public key
* @param[in] s uint8_t representation of vector s
*/
void PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) {
memcpy(pk, pk_seed, SEED_BYTES);
PQCLEAN_HQCRMRS192_AVX2_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64);
}



/**
* @brief Parse a public key from a string
*
* The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
*
* @param[out] h uint8_t representation of vector h
* @param[out] s uint8_t representation of vector s
* @param[in] pk String containing the public key
*/
void PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) {
AES_XOF_struct pk_seedexpander;
uint8_t pk_seed[SEED_BYTES] = {0};

memcpy(pk_seed, pk, SEED_BYTES);
pk += SEED_BYTES;
PQCLEAN_HQCRMRS192_AVX2_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES);

seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
PQCLEAN_HQCRMRS192_AVX2_vect_set_random(&pk_seedexpander, h);
}


/**
* @brief Parse a ciphertext into a string
*
* The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
*
* @param[out] ct String containing the ciphertext
* @param[in] u uint8_t representation of vector u
* @param[in] v uint8_t representation of vector v
* @param[in] d String containing the hash d
*/
void PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) {
PQCLEAN_HQCRMRS192_AVX2_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
ct += VEC_N_SIZE_BYTES;
PQCLEAN_HQCRMRS192_AVX2_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
ct += VEC_N1N2_SIZE_BYTES;
memcpy(ct, d, SHA512_BYTES);
}


/**
* @brief Parse a ciphertext from a string
*
* The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
*
* @param[out] u uint8_t representation of vector u
* @param[out] v uint8_t representation of vector v
* @param[out] d String containing the hash d
* @param[in] ct String containing the ciphertext
*/
void PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) {
PQCLEAN_HQCRMRS192_AVX2_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES);
ct += VEC_N_SIZE_BYTES;
PQCLEAN_HQCRMRS192_AVX2_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES);
ct += VEC_N1N2_SIZE_BYTES;
memcpy(d, ct, SHA512_BYTES);
}

+ 36
- 0
src/kem/hqc/hqc-rmrs-192/avx2/parsing.h Целия файл

@@ -0,0 +1,36 @@
#ifndef PARSING_H
#define PARSING_H


/**
* @file parsing.h
* @brief Header file for parsing.c
*/

#include <stdint.h>

void PQCLEAN_HQCRMRS192_AVX2_store8(unsigned char *out, uint64_t in);

uint64_t PQCLEAN_HQCRMRS192_AVX2_load8(const unsigned char *in);

void PQCLEAN_HQCRMRS192_AVX2_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen);

void PQCLEAN_HQCRMRS192_AVX2_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen);


void PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk);

void PQCLEAN_HQCRMRS192_AVX2_hqc_secret_key_from_string(uint64_t *x, uint64_t *y, uint8_t *pk, const uint8_t *sk);


void PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s);

void PQCLEAN_HQCRMRS192_AVX2_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk);


void PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d);

void PQCLEAN_HQCRMRS192_AVX2_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct);


#endif

+ 389
- 0
src/kem/hqc/hqc-rmrs-192/avx2/reed_muller.c Целия файл

@@ -0,0 +1,389 @@
#include "parameters.h"
#include "reed_muller.h"
#include <immintrin.h>
#include <stdint.h>
#include <string.h>
/**
* @file reed_muller.c
* Constant time implementation of Reed-Muller code RM(1,7)
*/


// number of repeated code words
#define MULTIPLICITY CEIL_DIVIDE(PARAM_N2, 128)

// copy bit 0 into all bits of a 64 bit value
#define BIT0MASK(x) (int64_t)(-((x) & 1))

static void encode(uint8_t *word, uint8_t message);
static void expand_and_sum(__m256i *dst, const uint64_t *src);
static void hadamard(__m256i *src, __m256i *dst);
static uint32_t find_peaks(__m256i *transform);



/**
* @brief Encode a single byte into a single codeword using RM(1,7)
*
* Encoding matrix of this code:
* bit pattern (note that bits are numbered big endian)
* 0 aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
* 1 cccccccc cccccccc cccccccc cccccccc
* 2 f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0
* 3 ff00ff00 ff00ff00 ff00ff00 ff00ff00
* 4 ffff0000 ffff0000 ffff0000 ffff0000
* 5 00000000 ffffffff 00000000 ffffffff
* 6 00000000 00000000 ffffffff ffffffff
* 7 ffffffff ffffffff ffffffff ffffffff
*
* @param[out] word An RM(1,7) codeword
* @param[in] message A message to encode
*/
static void encode(uint8_t *word, uint8_t message) {
uint32_t e;
// bit 7 flips all the bits, do that first to save work
e = BIT0MASK(message >> 7);
// bits 0, 1, 2, 3, 4 are the same for all four longs
// (Warning: in the bit matrix above, low bits are at the left!)
e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa;
e ^= BIT0MASK(message >> 1) & 0xcccccccc;
e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0;
e ^= BIT0MASK(message >> 3) & 0xff00ff00;
e ^= BIT0MASK(message >> 4) & 0xffff0000;
// we can store this in the first quarter
word[0 + 0] = (e >> 0x00) & 0xff;
word[0 + 1] = (e >> 0x08) & 0xff;
word[0 + 2] = (e >> 0x10) & 0xff;
word[0 + 3] = (e >> 0x18) & 0xff;
// bit 5 flips entries 1 and 3; bit 6 flips 2 and 3
e ^= BIT0MASK(message >> 5);
word[4 + 0] = (e >> 0x00) & 0xff;
word[4 + 1] = (e >> 0x08) & 0xff;
word[4 + 2] = (e >> 0x10) & 0xff;
word[4 + 3] = (e >> 0x18) & 0xff;
e ^= BIT0MASK(message >> 6);
word[12 + 0] = (e >> 0x00) & 0xff;
word[12 + 1] = (e >> 0x08) & 0xff;
word[12 + 2] = (e >> 0x10) & 0xff;
word[12 + 3] = (e >> 0x18) & 0xff;
e ^= BIT0MASK(message >> 5);
word[8 + 0] = (e >> 0x00) & 0xff;
word[8 + 1] = (e >> 0x08) & 0xff;
word[8 + 2] = (e >> 0x10) & 0xff;
word[8 + 3] = (e >> 0x18) & 0xff;
}



/**
* @brief Add multiple codewords into expanded codeword
*
* Note: this does not write the codewords as -1 or +1 as the green machine does
* instead, just 0 and 1 is used.
* The resulting hadamard transform has:
* all values are halved
* the first entry is 64 too high
*
* @param[out] dst Structure that contain the expanded codeword
* @param[in] src Structure that contain the codeword
*/
inline void expand_and_sum(__m256i *dst, const uint64_t *src) {
uint16_t v[16];
for (size_t part = 0; part < 8; part++) {
dst[part] = _mm256_setzero_si256();
}
for (size_t copy = 0; copy < MULTIPLICITY; copy++) {
for (size_t part = 0; part < 8; part++) {
for (size_t bit = 0; bit < 16; bit++) {
v[bit] = (((uint16_t *)(&src[2 * copy]))[part] >> bit) & 1;
}
dst[part] += _mm256_set_epi16(v[15], v[14], v[13], v[12], v[11], v[10], v[9], v[8],
v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
}
}
}



/**
* @brief Hadamard transform
*
* Perform hadamard transform of src and store result in dst
* src is overwritten: it is also used as intermediate buffer
* Method is best explained if we use H(3) instead of H(7):
*
* The routine multiplies by the matrix H(3):
* [1 1 1 1 1 1 1 1]
* [1 -1 1 -1 1 -1 1 -1]
* [1 1 -1 -1 1 1 -1 -1]
* [a b c d e f g h] * [1 -1 -1 1 1 -1 -1 1] = result of routine
* [1 1 1 1 -1 -1 -1 -1]
* [1 -1 1 -1 -1 1 -1 1]
* [1 1 -1 -1 -1 -1 1 1]
* [1 -1 -1 1 -1 1 1 -1]
* You can do this in three passes, where each pass does this:
* set lower half of buffer to pairwise sums,
* and upper half to differences
* index 0 1 2 3 4 5 6 7
* input: a, b, c, d, e, f, g, h
* pass 1: a+b, c+d, e+f, g+h, a-b, c-d, e-f, g-h
* pass 2: a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h
* pass 3: a+b+c+d+e+f+g+h a+b-c-d+e+f-g-h a+b+c+d-e-f-g-h a+b-c-d-e+-f+g+h
* a-b+c-d+e-f+g-h a-b-c+d+e-f-g+h a-b+c-d-e+f-g+h a-b-c+d-e+f+g-h
* This order of computation is chosen because it vectorises well.
* Likewise, this routine multiplies by H(7) in seven passes.
*
* @param[out] src Structure that contain the expanded codeword
* @param[out] dst Structure that contain the expanded codeword
*/
inline void hadamard(__m256i *src, __m256i *dst) {
// the passes move data:
// src -> dst -> src -> dst -> src -> dst -> src -> dst
// using p1 and p2 alternately
__m256i *p1 = src;
__m256i *p2 = dst;
__m256i *p3;
for (size_t pass = 0; pass < 7; pass++) {
// warning: hadd works "within lanes" as Intel call it
// so you have to swap the middle 64 bit blocks of the result
for (size_t part = 0; part < 4; part++) {
p2[part] = _mm256_permute4x64_epi64(_mm256_hadd_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8);
p2[part + 4] = _mm256_permute4x64_epi64(_mm256_hsub_epi16(p1[2 * part], p1[2 * part + 1]), 0xd8);
}
// swap p1, p2 for next round
p3 = p1;
p1 = p2;
p2 = p3;
}
}



/**
* @brief Finding the location of the highest value
*
* This is the final step of the green machine: find the location of the highest value,
* and add 128 if the peak is positive
* Notes on decoding
* The standard "Green machine" decoder words as follows:
* if the received codeword is W, compute (2 * W - 1) * H7
* The entries of the resulting vector are always even and vary from
* -128 (= the complement is a code word, add bit 7 to decode)
* via 0 (this is a different codeword)
* to 128 (this is the code word).
*
* Our decoding differs in two ways:
* - We take W instead of 2 * W - 1 (so the entries are 0,1 instead of -1,1)
* - We take the sum of the repititions (so the entries are 0..MULTIPLICITY)
* This implies that we have to subtract 64M (M=MULTIPLICITY)
* from the first entry to make sure the first codewords is handled properly
* and that the entries vary from -64M to 64M.
* -64M or 64M stands for a perfect codeword.
* If there are fewer than 32M errors, there is always a unique codeword
* which an entry with absolute value > 32M;
* this is because an error changes an entry by 1.
* The highest number that seem to be decodable is 50 errors, so that the
* highest entries in the hadamard transform can be as low as 12.
* But this is different for the repeated code.
* Because multiple codewords are added, this changes: the lowest value of the
* hadamard transform of the sum of six words is seen to be as low as 43 (!),
* which is way less than 12*6.
*
* It is possible that there are more errors, but the word is still uniquely
* decodable: we found a word with distance of 50 from the nearest codeword.
* That means that the highest entry can be as low as 14M.
* Since we have to do binary search, we search for the range 1-64M
* which can be done in 6+l2g(M) steps.
* The binary search is based on (values>32M are unique):
* M 32M min> max> firstStep #steps
* 2 64 1 64 33 +- 16 6
* 4 128 1 128 65 +- 32 7
* 6 192 1 192 129 +- 64 8
*
* As a check, we run a sample for M=6 to see the peak value; it ranged
* from 43 to 147, so my analysis looks right. Also, it shows that decoding
* far beyond the bound of 32M is needed.
*
* For the vectors, it would be tempting to use 8 bit ints,
* because the values "almost" fit in there.
* We could use some trickery to fit it in 8 bits, like saturated add or
* division by 2 in a late step.
* Unfortunately, these instructions do not exist.
* the adds _mm512_adds_epi8 is available only on the latest processors,
* and division, shift, mulhi are not available at all for 8 bits.
* So, we use 16 bit ints.
*
* For the search of the optimal comparison value,
* remember the transform contains 64M-d,
* where d are the distances to the codewords.
* The highest value gives the most likely codeword.
* There is not fast vectorized way to find this value, so we search for the
* maximum value itself.
* In each pass, we collect a bit map of the transform values that are,
* say >bound. There are three cases:
* bit map = 0: all code words are further away than 64M-bound (decrease bound)
* bit map has one bit: one unique code word has distance < 64M-bound
* bit map has multiple bits: multiple words (increase bound)
* We will search for the lowest value of bound that gives a nonzero bit map.
*
* @param[in] transform Structure that contain the expanded codeword
*/
inline uint32_t find_peaks(__m256i *transform) {
// a whole lot of vector variables
__m256i bitmap, abs_rows[8], bound, active_row, max_abs_rows;
__m256i tmp = _mm256_setzero_si256();
__m256i vect_mask;
__m256i res;
int32_t lower;
int32_t width;
uint32_t message;
uint32_t mask;
int8_t index;
int8_t abs_value;
int8_t mask1;
int8_t mask2;
uint16_t result;

// compute absolute value of transform
for (size_t i = 0; i < 8; i++) {
abs_rows[i] = _mm256_abs_epi16(transform[i]);
}
// compute a vector of 16 elements which contains the maximum somewhere
// (later used to compute bits 0 through 3 of message)
max_abs_rows = abs_rows[0];
for (size_t i = 1; i < 8; i++) {
max_abs_rows = _mm256_max_epi16(max_abs_rows, abs_rows[i]);
}

// do binary search for the highest value that is lower than the maximum
// loop invariant: lower gives bit map = 0, lower + width gives bit map > 0
lower = 1;
// this gives 64, 128 or 256 for MULTIPLICITY = 2, 4, 6
width = 1 << (5 + MULTIPLICITY / 2);
// if you don't unroll this loop, it fits in the loop cache
// uncomment the line below to speeding up the program by a few percent
// #pragma GCC unroll 0
while (width > 1) {
width >>= 1;
// compare with lower + width; put result in bitmap
// make vector from value of new bound
bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width));
bitmap = _mm256_cmpgt_epi16(max_abs_rows, bound);
// step up if there are any matches
// rely on compiler to use conditional move here
mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap);
mask = ~(uint32_t) ((-(int64_t) mask) >> 63);
lower += mask & width;
}
// lower+width contains the maximum value of the vector
// or less, if the maximum is very high (which is OK)
// normally, there is one maximum, but sometimes there are more
// find where the maxima occur in the maximum vector
// (each determines lower 4 bits of peak position)
// construct vector filled with bound-1
bound = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(lower + width - 1));

// find in which of the 8 groups a maximum occurs to compute bits 4, 5, 6 of message
// find lowest value by searching backwards skip first check to save time
message = 0x70;
for (size_t i = 0; i < 8; i++) {
bitmap = _mm256_cmpgt_epi16(abs_rows[7 - i], bound);
mask = (uint32_t) _mm256_testz_si256(bitmap, bitmap);
mask = ~(uint32_t) ((-(int64_t) mask) >> 63);
message ^= mask & (message ^ ((7 - i) << 4));
}
// we decided which row of the matrix contains the lowest match
// select proper row
index = message >> 4;

tmp = _mm256_setzero_si256();
for (size_t i = 0; i < 8; i++) {
abs_value = (int8_t)(index - i);
mask1 = abs_value >> 7;
abs_value ^= mask1;
abs_value -= mask1;
mask2 = ((uint8_t) - abs_value >> 7);
mask = (-1ULL) + mask2;
vect_mask = _mm256_set1_epi32(mask);
res = _mm256_and_si256(abs_rows[i], vect_mask);
tmp = _mm256_or_si256(tmp, res);
}

active_row = tmp;

// get the column number of the vector element
// by setting the bits corresponding to the columns
// and then adding elements within two groups of 8
vect_mask = _mm256_cmpgt_epi16(active_row, bound);
vect_mask &= _mm256_set_epi16(-32768, 16384, 8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1);
for (size_t i = 0; i < 3; i++) {
vect_mask = _mm256_hadd_epi16(vect_mask, vect_mask);
}
// add low 4 bits of message
message |= __tzcnt_u16(_mm256_extract_epi16(vect_mask, 0) + _mm256_extract_epi16(vect_mask, 8));

// set bit 7 if sign of biggest value is positive
// make sure a jump isn't generated by the compiler
tmp = _mm256_setzero_si256();
for (size_t i = 0; i < 8; i++) {
mask = ~(uint32_t) ((-(int64_t)(i ^ message / 16)) >> 63);
vect_mask = _mm256_set1_epi32(mask);
tmp = _mm256_or_si256(tmp, _mm256_and_si256(vect_mask, transform[i]));
}
result = 0;
for (size_t i = 0; i < 16; i++) {
mask = ~(uint32_t) ((-(int64_t)(i ^ message % 16)) >> 63);
result |= mask & ((uint16_t *)&tmp)[i];
}
message |= (0x8000 & ~result) >> 8;
return message;
}



/**
* @brief Encodes the received word
*
* The message consists of N1 bytes each byte is encoded into PARAM_N2 bits,
* or MULTIPLICITY repeats of 128 bits
*
* @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message
* @param[in] msg Array of size VEC_N1_SIZE_64 storing the message
*/
void PQCLEAN_HQCRMRS192_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) {
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
// encode first word
encode(&cdw[16 * i * MULTIPLICITY], msg[i]);
// copy to other identical codewords
for (size_t copy = 1; copy < MULTIPLICITY; copy++) {
memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16);
}
}
}



/**
* @brief Decodes the received word
*
* Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane.
* The theory of error-correcting codes codes @cite macwilliams1977theory
*
* @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message
* @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word
*/
void PQCLEAN_HQCRMRS192_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) {
__m256i expanded[8];
__m256i transform[8];
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
// collect the codewords
expand_and_sum(expanded, (uint64_t *)&cdw[16 * i * MULTIPLICITY]);
// apply hadamard transform
hadamard(expanded, transform);
// fix the first entry to get the half Hadamard transform
transform[0] -= _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 64 * MULTIPLICITY);
// finish the decoding
msg[i] = find_peaks(transform);
}
}

+ 18
- 0
src/kem/hqc/hqc-rmrs-192/avx2/reed_muller.h Целия файл

@@ -0,0 +1,18 @@
#ifndef REED_MULLER_H
#define REED_MULLER_H


/**
* @file reed_muller.h
* Header file of reed_muller.c
*/
#include "parameters.h"
#include <stddef.h>
#include <stdint.h>

void PQCLEAN_HQCRMRS192_AVX2_reed_muller_encode(uint8_t *cdw, const uint8_t *msg);

void PQCLEAN_HQCRMRS192_AVX2_reed_muller_decode(uint8_t *msg, const uint8_t *cdw);


#endif

+ 476
- 0
src/kem/hqc/hqc-rmrs-192/avx2/reed_solomon.c Целия файл

@@ -0,0 +1,476 @@
#include "fft.h"
#include "gf.h"
#include "parameters.h"
#include "parsing.h"
#include "reed_solomon.h"
#include <stdint.h>
#include <stdio.h>
#include <string.h>
/**
* @file reed_solomon.c
* Constant time implementation of Reed-Solomon codes
*/


static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw);
static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
static void compute_roots(uint8_t *error, uint16_t *sigma);
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes);
static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error);
static void correct_errors(uint8_t *cdw, const uint16_t *error_values);

static const __m256i alpha_ij256_1[55] = {
{0x0010000800040002, 0x001d008000400020, 0x00cd00e80074003a, 0x004c002600130087},
{0x001d004000100004, 0x004c001300cd0074, 0x008f00ea00b4002d, 0x009d006000180006},
{0x00cd003a00400008, 0x008f0075002d0026, 0x002500270060000c, 0x004600c100b50035},
{0x004c00cd001d0010, 0x009d0018008f00b4, 0x004600ee006a0025, 0x005f00b9005d0014},
{0x00b4002600740020, 0x006a009c00600003, 0x00b900a0000500c1, 0x00fd000f005e00be},
{0x008f002d00cd0040, 0x004600b500250060, 0x0065006100b90050, 0x00d900df006b0078},
{0x0018007500130080, 0x005d008c00b5009c, 0x006b003c005e00a1, 0x0081001a004300a3},
{0x009d008f004c001d, 0x005f005d0046006a, 0x00d900fe00fd0065, 0x0085003b0081000d},
{0x0025000c002d003a, 0x006500a1005000c1, 0x00d0008600df00e7, 0x00a800a9006600ed},
{0x006a006000b40074, 0x00fd005e00b90005, 0x003b0067001100df, 0x00e600550084002e},
{0x00ee002700ea00e8, 0x00fe003c006100a0, 0x00b8007600670086, 0x00e3009100390054},
{0x00460025008f00cd, 0x00d9006b006500b9, 0x00a800b8003b00d0, 0x0082009600fc00e4},
{0x0014003500060087, 0x000d00a3007800be, 0x00e40054002e00ed, 0x00510064006200e5},
{0x005d00b500180013, 0x00810043006b005e, 0x00fc003900840066, 0x0012005900c80062},
{0x00b900c100600026, 0x003b001a00df000f, 0x00960091005500a9, 0x002c002400590064},
{0x005f0046009d004c, 0x0085008100d900fd, 0x008200e300e600a8, 0x0002002c00120051},
{0x0099000a004e0098, 0x004f0093004400d6, 0x00dd00dc00d70092, 0x00980001000b0045},
{0x006500500025002d, 0x00a8006600d000df, 0x00c30007009600bf, 0x0027002600ad00fb},
{0x001e00ba0094005a, 0x0049006d003e00e2, 0x003d00a200ae00b3, 0x008c006000e80083},
{0x00fd00b9006a00b4, 0x00e60084003b0011, 0x002c00ac001c0096, 0x00be00c100030020},
{0x006b00a100b50075, 0x00fc00290066001a, 0x00ad00f500590057, 0x00e700b90035002d},
{0x00fe006100ee00ea, 0x00e3003900b80067, 0x003a00b000ac0007, 0x00af000f002800c0},
{0x005b002f009f00c9, 0x009500d10021007c, 0x0075004700f400a6, 0x001f00df00c200ee},
{0x00d900650046008f, 0x008200fc00a8003b, 0x0027003a002c00c3, 0x0017001a00e700ba},
{0x0011000f00050003, 0x001c00ff00550033, 0x00c100b4006c0024, 0x004d003b00e2005e},
{0x000d007800140006, 0x0051006200e4002e, 0x00ba00c0002000fb, 0x00d100a900bd00bb},
{0x00d000e70050000c, 0x00c3005700bf00a9, 0x002f00b50026007d, 0x00db005500c500d9},
{0x0081006b005d0018, 0x001200c800fc0084, 0x00e70028000300ad, 0x00190091009e00bd},
{0x00f8007f00690030, 0x00f700e000f1004d, 0x00b6005f009c0040, 0x00a2009600aa00ec},
{0x003b00df00b90060, 0x002c005900960055, 0x001a000f00c10026, 0x00240064009100a9},
{0x009700b600de00c0, 0x001b009b006e0072, 0x00ed00b100a0008f, 0x00580059004b0052},
{0x008500d9005f009d, 0x00020012008200e6, 0x001700af00be0027, 0x00040024001900d1},
{0x00b8008600610027, 0x003a00f500070091, 0x001500d0000f00b5, 0x002d002c00a600f1},
{0x004f00440099004e, 0x0098000b00dd00d7, 0x0092009300d6000a, 0x004e0001004500dc},
{0x0084001a005e009c, 0x000300e9005900ff, 0x0091002e00e200b9, 0x0005002600eb001c},
{0x00a800d000650025, 0x002700ad00c30096, 0x00db0015001a002f, 0x00610060003600f2},
{0x005200ce0089004a, 0x00d40010008a0037, 0x00570049007c0078, 0x00d300c1001d0048},
{0x0049003e001e0094, 0x008c00e8003d00ae, 0x003800630033007f, 0x004300b900ea0016},
{0x00e400ed00780035, 0x00ba002d00fb0064, 0x00f200f100a900d9, 0x003e000f002500ad},
{0x00e6003b00fd006a, 0x00be0003002c001c, 0x00240037004d001a, 0x002e00df00050074},
{0x00c600c500d300d4, 0x00ca009d00cf00a7, 0x008b00c80072003e, 0x009a001a005f00c9},
{0x00fc0066006b00b5, 0x00e7003500ad0059, 0x003600a6009100c5, 0x00bf003b00780025},
{0x007b001700b10077, 0x00e1009f000800ef, 0x0040002b00ff00b8, 0x00ab00a9005b008c},
{0x00e300b800fe00ee, 0x00af0028003a00ac, 0x002d007a00370015, 0x00320055003400de},
{0x009600a900df00c1, 0x001a00b900260024, 0x0060002c00640055, 0x00590091003b000f},
{0x00950021005b009f, 0x001f00c2007500f4, 0x00b500d800a70073, 0x0048009600da00fe},
{0x00a5001500710023, 0x00760089000c00eb, 0x0050008000ef00fc, 0x00b0006400520022},
{0x008200a800d90046, 0x001700e70027002c, 0x0061002d002400db, 0x0008005900bf003e},
{0x00c800290043008c, 0x009e00fe003500e9, 0x0078003000eb006e, 0x005a002400e300cc},
{0x001c005500110005, 0x004d00e200c1006c, 0x00df006a00e90064, 0x009c002c00ae0084},
{0x00dd00920044000a, 0x00920044000a0001, 0x0044000a000100dd, 0x000a000100dd0092},
{0x005100e4000d0014, 0x00d100bd00ba0020, 0x003e00de007400f2, 0x00c20026002b003f},
{0x0079007300340028, 0x00e500f800a10074, 0x006600ca00b4008a, 0x00bb006000f7004b},
{0x00c300bf00d00050, 0x00db00c5002f0026, 0x0021006b006000f5, 0x008600c100cf0082},
{0x00ac0091006700a0, 0x0037002e000f00b4, 0x005500e2006a002c, 0x007c00b9002000a7}
};
static const __m256i alpha_ij256_2[55] = {
{0x00b4005a002d0098, 0x008f00c900ea0075, 0x0018000c00060003, 0x009d00c000600030},
{0x006a00940025004e, 0x0046009f00ee00b5, 0x005d005000140005, 0x005f00de00b90069},
{0x00b900ba0050000a, 0x0065002f006100a1, 0x006b00e70078000f, 0x00d900b600df007f},
{0x00fd001e00650099, 0x00d9005b00fe006b, 0x008100d0000d0011, 0x00850097003b00f8},
{0x001100e200df00d6, 0x003b007c0067001a, 0x008400a9002e0033, 0x00e600720055004d},
{0x003b003e00d00044, 0x00a8002100b80066, 0x00fc00bf00e40055, 0x0082006e009600f1},
{0x0084006d00660093, 0x00fc00d100390029, 0x00c80057006200ff, 0x0012009b005900e0},
{0x00e6004900a8004f, 0x0082009500e300fc, 0x001200c30051001c, 0x0002001b002c00f7},
{0x009600b300bf0092, 0x00c300a600070057, 0x00ad007d00fb0024, 0x0027008f00260040},
{0x001c00ae009600d7, 0x002c00f400ac0059, 0x000300260020006c, 0x00be00a000c1009c},
{0x00ac00a2000700dc, 0x003a004700b000f5, 0x002800b500c000b4, 0x00af00b1000f005f},
{0x002c003d00c300dd, 0x00270075003a00ad, 0x00e7002f00ba00c1, 0x001700ed001a00b6},
{0x0020008300fb0045, 0x00ba00ee00c0002d, 0x00bd00d900bb005e, 0x00d1005200a900ec},
{0x000300e800ad000b, 0x00e700c200280035, 0x009e00c500bd00e2, 0x0019004b009100aa},
{0x00c1006000260001, 0x001a00df000f00b9, 0x0091005500a9003b, 0x0024005900640096},
{0x00be008c00270098, 0x0017001f00af00e7, 0x001900db00d1004d, 0x00040058002400a2},
{0x00d60099000a004e, 0x0092004f00930044, 0x004500dd00dc00d7, 0x004e00980001000b},
{0x001a007f002f000a, 0x00db0073001500c5, 0x003600f500f20064, 0x00610046006000cd},
{0x00330034007f0099, 0x00380062006300a8, 0x00ea0008001600ac, 0x004300f000b900d4},
{0x004d0033001a00d6, 0x002400a700370091, 0x00050060007400e9, 0x002e006700df005e},
{0x009100a800c50044, 0x0036003d00a6006e, 0x007800ba00250026, 0x00bf0015003b0086},
{0x0037006300150093, 0x002d00d8007a00a6, 0x0034006b00de006a, 0x0032007b00550085},
{0x00a700620073004f, 0x00b5005a00d8003d, 0x00da00ce00fe00be, 0x004800e0009600d5},
{0x0024003800db0092, 0x006100b5002d0036, 0x00bf0021003e00df, 0x000800fb0059006e},
{0x00e900ac006400d7, 0x00df00be006a0026, 0x00ae00910084007c, 0x009c0074002c00ef},
{0x0074001600f200dc, 0x003e00fe00de0025, 0x002b0082003f0084, 0x00c200d4002600fa},
{0x0060000800f500dd, 0x002100ce006b00ba, 0x00cf005600820091, 0x0086006500c1002d},
{0x000500ea00360045, 0x00bf00da00340078, 0x005a00cf002b00ae, 0x005c0088000f0023},
{0x005e00d400cd000b, 0x006e00d500850086, 0x0023002d00fa00ef, 0x006300da001a001e},
{0x00df00b900600001, 0x005900960055003b, 0x000f00c10026002c, 0x0064009100a9001a},
{0x006700f000460098, 0x00fb00e0007b0015, 0x0088006500d40074, 0x009000c8009100da},
{0x002e00430061004e, 0x00080048003200bf, 0x005c008600c2009c, 0x0010009000640063},
{0x005500ed006b000a, 0x000c003600c300c4, 0x0073006600b600b9, 0x0025000800240082},
{0x00d7004f00440099, 0x000a0098000b00dd, 0x00dc0092009300d6, 0x0099004e00010045},
{0x00ae0072003b00d6, 0x000f006a00200024, 0x00ef0096004d0067, 0x001100be0060006c},
{0x005900f100210044, 0x008600a1000c00cf, 0x007d00a600b300a9, 0x00b800d900b9008f},
{0x00f4001900e40093, 0x00c500b1008c00cd, 0x004c00fb008d00e6, 0x00c600cc00df0028},
{0x006c007900f1004f, 0x002900bd00bc0027, 0x00ee004000090037, 0x00c800b7003b00d3},
{0x002600f500820092, 0x00b300b800b60050, 0x0065002700360059, 0x003d0057005500ce},
{0x009c006c005900d7, 0x00640072007c000f, 0x001100b900b400eb, 0x002000ac00960084},
{0x00a00013003d00dc, 0x005600ab009e00d9, 0x0085007f009f0020, 0x004a00d8005900e5},
{0x000f002700cf00dd, 0x007d0038007300ed, 0x00e4003e00650060, 0x002f000c002c0007},
{0x00e20014003a0045, 0x00cd001200310021, 0x00950015004300a0, 0x0022006900260090},
{0x007c00bc000c000b, 0x0025008300e00073, 0x007900fc009700fd, 0x006d00e100c10002},
{0x00a900df00c10001, 0x00b9002600240096, 0x002c00640055001a, 0x0091003b000f0060},
{0x007200bd00a10098, 0x006b009400830038, 0x0087008a00e3002e, 0x008d00aa001a00d2},
{0x00ff008500e7004e, 0x00d0006f0013008a, 0x00d4003600700072, 0x007a006200a900fe},
{0x006400290086000a, 0x00b8006b0025007d, 0x002f0075003d0096, 0x004000f2009100ed},
{0x00ef003f00ed0099, 0x00e400680069003a, 0x00af0046008e00a7, 0x009400fa0064009a},
{0x00eb003700a900d6, 0x0096002e00fd0060, 0x0033000f000300f4, 0x005e00b4002400ff},
{0x000100dd00920044, 0x00dd00920044000a, 0x00920044000a0001, 0x0044000a000100dd},
{0x00b4000900b30093, 0x003d00e300970065, 0x00310017003c0003, 0x00da00d3006000f3},
{0x006a00b00057004f, 0x00ad000e009a00b6, 0x00a200e400880005, 0x003f001f00b90080},
{0x00b9004000a60092, 0x0075008a00fc003e, 0x008b00c40017000f, 0x000700a800df0025},
{0x00fd0003002400d7, 0x00c100e900ae00a9, 0x0074005900720011, 0x00f400ff003b00be}
};

/**
* @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes
*
* Following @cite lin1983error (Chapter 4 - Cyclic Codes),
* We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register
* with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code.
*
* @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message
* @param[in] msg Array of size VEC_K_SIZE_64 storing the message
*/
void PQCLEAN_HQCRMRS192_AVX2_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) {
size_t i, k;
uint8_t gate_value = 0;
uint8_t prev, x;

union {
uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)];
__m256i dummy;
} tmp = {0};

union {
uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)];
__m256i dummy;
} PARAM_RS_POLY = {{ RS_POLY_COEFS }};

__m256i *tmp256 = (__m256i *)tmp.arr16;
__m256i *param256 = (__m256i *)PARAM_RS_POLY.arr16;

for (i = 0; i < PARAM_K; ++i) {
gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]);
tmp256[0] = PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[0]);
tmp256[1] = PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[1]);

for (size_t j = 32; j < PARAM_G; ++j) {
tmp.arr16[j] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(gate_value, PARAM_RS_POLY.arr16[j]);
}

prev = 0;
for (k = 0; k < PARAM_N1 - PARAM_K; k++) {
x = cdw[k];
cdw[k] = (uint8_t) (prev ^ tmp.arr16[k]);
prev = x;
}
}

memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K);
}



/**
* @brief Computes 2 * PARAM_DELTA syndromes
*
* @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes
* @param[in] cdw Array of size PARAM_N1 storing the received vector
*/
void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) {
__m256i *syndromes256 = (__m256i *) syndromes;
syndromes256[0] = _mm256_set1_epi16(cdw[0]);

for (size_t i = 0; i < PARAM_N1 - 1; ++i) {
syndromes256[0] ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_1[i]);
}

for (size_t i = 0; i < PARAM_N1 - 1; ++i) {
syndromes256[1] ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_2[i]);
}
}



/**
* @brief Computes the error locator polynomial (ELP) sigma
*
* This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes). <br>
* We use the letter p for rho which is initialized at -1. <br>
* The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X). <br>
* Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p. <br>
* sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated. <br>
* We can properly correct only if the degree of sigma does not exceed PARAM_DELTA.
* This means only the first PARAM_DELTA + 1 coefficients of sigma are of value
* and we only need to save its first PARAM_DELTA - 1 coefficients.
*
* @returns the degree of the ELP sigma
* @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP
* @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes
*/
static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) {
uint16_t deg_sigma = 0;
uint16_t deg_sigma_p = 0;
uint16_t deg_sigma_copy = 0;
uint16_t sigma_copy[PARAM_DELTA + 1] = {0};
uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1};
uint16_t pp = (uint16_t) -1; // 2*rho
uint16_t d_p = 1;
uint16_t d = syndromes[0];

uint16_t mask1, mask2, mask12;
uint16_t deg_X, deg_X_sigma_p;
uint16_t dd;
uint16_t mu;

uint16_t i;

sigma[0] = 1;
for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) {
// Save sigma in case we need it to update X_sigma_p
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA));
deg_sigma_copy = deg_sigma;

dd = PQCLEAN_HQCRMRS192_AVX2_gf_mul(d, PQCLEAN_HQCRMRS192_AVX2_gf_inverse(d_p));

for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
sigma[i] ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(dd, X_sigma_p[i]);
}

deg_X = mu - pp;
deg_X_sigma_p = deg_X + deg_sigma_p;

// mask1 = 0xffff if(d != 0) and 0 otherwise
mask1 = -((uint16_t) - d >> 15);

// mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise
mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15);

// mask12 = 0xffff if the deg_sigma increased and 0 otherwise
mask12 = mask1 & mask2;
deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma);

if (mu == (2 * PARAM_DELTA - 1)) {
break;
}

pp ^= mask12 & (mu ^ pp);
d_p ^= mask12 & (d ^ d_p);
for (i = PARAM_DELTA; i; --i) {
X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]);
}

deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p);
d = syndromes[mu + 1];

for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
d ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(sigma[i], syndromes[mu + 1 - i]);
}
}

return deg_sigma;
}



/**
* @brief Computes the error polynomial error from the error locator polynomial sigma
*
* See function PQCLEAN_HQCRMRS192_AVX2_fft for more details.
*
* @param[out] error Array of 2^PARAM_M elements receiving the error polynomial
* @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error
* @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
*/
static void compute_roots(uint8_t *error, uint16_t *sigma) {
uint16_t w[1 << PARAM_M] = {0};

PQCLEAN_HQCRMRS192_AVX2_fft(w, sigma, PARAM_DELTA + 1);
PQCLEAN_HQCRMRS192_AVX2_fft_retrieve_error_poly(error, w);
}



/**
* @brief Computes the polynomial z(x)
*
* See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
*
* @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x)
* @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
* @param[in] degree Integer that is the degree of polynomial sigma
* @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes
*/
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) {
size_t i, j;
uint16_t mask;

z[0] = 1;

for (i = 1; i < PARAM_DELTA + 1; ++i) {
mask = -((uint16_t) (i - degree - 1) >> 15);
z[i] = mask & sigma[i];
}

z[1] ^= syndromes[0];

for (i = 2; i <= PARAM_DELTA; ++i) {
mask = -((uint16_t) (i - degree - 1) >> 15);
z[i] ^= mask & syndromes[i - 1];

for (j = 1; j < i; ++j) {
z[i] ^= mask & PQCLEAN_HQCRMRS192_AVX2_gf_mul(sigma[j], syndromes[i - j - 1]);
}
}
}



/**
* @brief Computes the error values
*
* See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
*
* @param[out] error_values Array of PARAM_DELTA elements receiving the error values
* @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x)
* @param[in] z_degree Integer that is the degree of polynomial z(x)
* @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error
*/
static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) {
uint16_t beta_j[PARAM_DELTA] = {0};
uint16_t e_j[PARAM_DELTA] = {0};

uint16_t delta_counter;
uint16_t delta_real_value;
uint16_t found;
uint16_t mask1;
uint16_t mask2;
uint16_t tmp1;
uint16_t tmp2;
uint16_t inverse;
uint16_t inverse_power_j;

// Compute the beta_{j_i} page 31 of the documentation
delta_counter = 0;
for (size_t i = 0; i < PARAM_N1; i++) {
found = 0;
mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
for (size_t j = 0; j < PARAM_DELTA; j++) {
mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
beta_j[j] += mask1 & mask2 & gf_exp[i];
found += mask1 & mask2 & 1;
}
delta_counter += found;
}
delta_real_value = delta_counter;

// Compute the e_{j_i} page 31 of the documentation
for (size_t i = 0; i < PARAM_DELTA; ++i) {
tmp1 = 1;
tmp2 = 1;
inverse = PQCLEAN_HQCRMRS192_AVX2_gf_inverse(beta_j[i]);
inverse_power_j = 1;

for (size_t j = 1; j <= PARAM_DELTA; ++j) {
inverse_power_j = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inverse_power_j, inverse);
tmp1 ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(inverse_power_j, z[j]);
}
for (size_t k = 1; k < PARAM_DELTA; ++k) {
tmp2 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA])));
}
mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value
e_j[i] = mask1 & PQCLEAN_HQCRMRS192_AVX2_gf_mul(tmp1, PQCLEAN_HQCRMRS192_AVX2_gf_inverse(tmp2));
}

// Place the delta e_{j_i} values at the right coordinates of the output vector
delta_counter = 0;
for (size_t i = 0; i < PARAM_N1; ++i) {
found = 0;
mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
for (size_t j = 0; j < PARAM_DELTA; j++) {
mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
error_values[i] += mask1 & mask2 & e_j[j];
found += mask1 & mask2 & 1;
}
delta_counter += found;
}
}



/**
* @brief Correct the errors
*
* @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector
* @param[in] error Array of the error vector
* @param[in] error_values Array of PARAM_DELTA elements storing the error values
*/
static void correct_errors(uint8_t *cdw, const uint16_t *error_values) {
for (size_t i = 0; i < PARAM_N1; ++i) {
cdw[i] ^= error_values[i];
}
}



/**
* @brief Decodes the received word
*
* This function relies on six steps:
* <ol>
* <li> The first step, is the computation of the 2*PARAM_DELTA syndromes.
* <li> The second step is the computation of the error-locator polynomial sigma.
* <li> The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses.
* <li> The fourth step, is the polynomial z(x).
* <li> The fifth step, is the computation of the error values.
* <li> The sixth step is the correction of the errors in the received polynomial.
* </ol>
* For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error
*
* @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message
* @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word
*/
void PQCLEAN_HQCRMRS192_AVX2_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) {
uint16_t syndromes[2 * PARAM_DELTA] = {0};
uint16_t sigma[1 << PARAM_FFT] = {0};
uint8_t error[1 << PARAM_M] = {0};
uint16_t z[PARAM_N1] = {0};
uint16_t error_values[PARAM_N1] = {0};
uint16_t deg;

// Calculate the 2*PARAM_DELTA syndromes
compute_syndromes(syndromes, cdw);

// Compute the error locator polynomial sigma
// Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room
deg = compute_elp(sigma, syndromes);

// Compute the error polynomial error
compute_roots(error, sigma);

// Compute the polynomial z(x)
compute_z_poly(z, sigma, deg, syndromes);

// Compute the error values
compute_error_values(error_values, z, error);

// Correct the errors
correct_errors(cdw, error_values);

// Retrieve the message from the decoded codeword
memcpy(msg, cdw + (PARAM_G - 1), PARAM_K);

}

+ 20
- 0
src/kem/hqc/hqc-rmrs-192/avx2/reed_solomon.h
Файловите разлики са ограничени, защото са твърде много
Целия файл


+ 178
- 0
src/kem/hqc/hqc-rmrs-192/avx2/vector.c Целия файл

@@ -0,0 +1,178 @@
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "vector.h"
#include <immintrin.h>
#include <stdint.h>
#include <string.h>
/**
* @file vector.c
* @brief Implementation of vectors sampling and some utilities for the HQC scheme
*/



/**
* @brief Generates a vector of a given Hamming weight
*
* This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>.
* To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
* 1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
* 2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times 70853\f$
* 3. If \f$ x \geq t\f$, go to 1
* 4. It return \f$ r = x \mod 70853\f$
*
* The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
*
* @param[in] v Pointer to an array
* @param[in] weight Integer that is the Hamming weight
* @param[in] ctx Pointer to the context of the seed expander
*/
void PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) {
size_t random_bytes_size = 3 * weight;
uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0};
uint32_t tmp[PARAM_OMEGA_R] = {0};
__m256i bit256[PARAM_OMEGA_R];
__m256i bloc256[PARAM_OMEGA_R];
__m256i posCmp256 = _mm256_set_epi64x(3, 2, 1, 0);
__m256i pos256;
__m256i mask256;
__m256i aux;
__m256i i256;
uint64_t bloc, pos, bit64;
uint8_t inc;
size_t i, j, k;

i = 0;
j = random_bytes_size;
while (i < weight) {
do {
if (j == random_bytes_size) {
seedexpander(ctx, rand_bytes, random_bytes_size);
j = 0;
}

tmp[i] = ((uint32_t) rand_bytes[j++]) << 16;
tmp[i] |= ((uint32_t) rand_bytes[j++]) << 8;
tmp[i] |= rand_bytes[j++];

} while (tmp[i] >= UTILS_REJECTION_THRESHOLD);

tmp[i] = tmp[i] % PARAM_N;

inc = 1;
for (k = 0; k < i; k++) {
if (tmp[k] == tmp[i]) {
inc = 0;
}
}
i += inc;
}

for (i = 0; i < weight; i++) {
// we store the bloc number and bit position of each vb[i]
bloc = tmp[i] >> 6;
bloc256[i] = _mm256_set1_epi64x(bloc >> 2);
pos = (bloc & 0x3UL);
pos256 = _mm256_set1_epi64x(pos);
mask256 = _mm256_cmpeq_epi64(pos256, posCmp256);
bit64 = 1ULL << (tmp[i] & 0x3f);
bit256[i] = _mm256_set1_epi64x(bit64)&mask256;
}

for (i = 0; i < CEIL_DIVIDE(PARAM_N, 256); i++) {
aux = _mm256_loadu_si256(((__m256i *)v) + i);
i256 = _mm256_set1_epi64x(i);

for (j = 0; j < weight; j++) {
mask256 = _mm256_cmpeq_epi64(bloc256[j], i256);
aux ^= bit256[j] & mask256;
}
_mm256_storeu_si256(((__m256i *)v) + i, aux);
}

}



/**
* @brief Generates a random vector of dimension <b>PARAM_N</b>
*
* This function generates a random binary vector of dimension <b>PARAM_N</b>. It generates a random
* array of bytes using the seedexpander function, and drop the extra bits using a mask.
*
* @param[in] v Pointer to an array
* @param[in] ctx Pointer to the context of the seed expander
*/
void PQCLEAN_HQCRMRS192_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) {
uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0};

seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES);

PQCLEAN_HQCRMRS192_AVX2_load8_arr(v, VEC_N_SIZE_64, rand_bytes, VEC_N_SIZE_BYTES);
v[VEC_N_SIZE_64 - 1] &= RED_MASK;
}



/**
* @brief Adds two vectors
*
* @param[out] o Pointer to an array that is the result
* @param[in] v1 Pointer to an array that is the first vector
* @param[in] v2 Pointer to an array that is the second vector
* @param[in] size Integer that is the size of the vectors
*/
void PQCLEAN_HQCRMRS192_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) {
for (uint32_t i = 0; i < size; ++i) {
o[i] = v1[i] ^ v2[i];
}
}



/**
* @brief Compares two vectors
*
* @param[in] v1 Pointer to an array that is first vector
* @param[in] v2 Pointer to an array that is second vector
* @param[in] size Integer that is the size of the vectors
* @returns 0 if the vectors are equals and a negative/psotive value otherwise
*/
uint8_t PQCLEAN_HQCRMRS192_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size) {
uint64_t r = 0;
for (size_t i = 0; i < size; i++) {
r |= v1[i] ^ v2[i];
}
r = (~r + 1) >> 63;
return (uint8_t) r;
}



/**
* @brief Resize a vector so that it contains <b>size_o</b> bits
*
* @param[out] o Pointer to the output vector
* @param[in] size_o Integer that is the size of the output vector in bits
* @param[in] v Pointer to the input vector
* @param[in] size_v Integer that is the size of the input vector in bits
*/
void PQCLEAN_HQCRMRS192_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) {
uint64_t mask = 0x7FFFFFFFFFFFFFFF;
int8_t val = 0;
if (size_o < size_v) {
if (size_o % 64) {
val = 64 - (size_o % 64);
}

memcpy(o, v, VEC_N1N2_SIZE_BYTES);

for (int8_t i = 0; i < val; ++i) {
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i);
}
} else {
memcpy(o, v, CEIL_DIVIDE(size_v, 8));
}
}

+ 27
- 0
src/kem/hqc/hqc-rmrs-192/avx2/vector.h Целия файл

@@ -0,0 +1,27 @@
#ifndef VECTOR_H
#define VECTOR_H


/**
* @file vector.h
* @brief Header file for vector.c
*/
#include "nistseedexpander.h"
#include "randombytes.h"
#include <stdint.h>

void PQCLEAN_HQCRMRS192_AVX2_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight);

void PQCLEAN_HQCRMRS192_AVX2_vect_set_random(AES_XOF_struct *ctx, uint64_t *v);

void PQCLEAN_HQCRMRS192_AVX2_vect_set_random_from_randombytes(uint64_t *v);


void PQCLEAN_HQCRMRS192_AVX2_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size);

uint8_t PQCLEAN_HQCRMRS192_AVX2_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size);

void PQCLEAN_HQCRMRS192_AVX2_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v);


#endif

+ 16
- 0
src/kem/hqc/hqc-rmrs-192/clean/CMakeLists.txt Целия файл

@@ -0,0 +1,16 @@
set(
SRC_CLEAN_HQCRMRS192
code.c
fft.c
gf2x.c
gf.c
hqc.c
kem.c
parsing.c
reed_muller.c
reed_solomon.c
vector.c
)

define_kem_alg(hqcrmrs192_clean
PQCLEAN_HQCRMRS192_CLEAN "${SRC_CLEAN_HQCRMRS192}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 25
- 0
src/kem/hqc/hqc-rmrs-192/clean/api.h Целия файл

@@ -0,0 +1,25 @@
#ifndef PQCLEAN_HQCRMRS192_CLEAN_API_H
#define PQCLEAN_HQCRMRS192_CLEAN_API_H
/**
* @file api.h
* @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
*/

#define PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_ALGNAME "HQC-RMRS-192"

#define PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_SECRETKEYBYTES 4562
#define PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_PUBLICKEYBYTES 4522
#define PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_BYTES 64
#define PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_CIPHERTEXTBYTES 9026

// As a technicality, the public key is appended to the secret key in order to respect the NIST API.
// Without this constraint, PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_SECRETKEYBYTES would be defined as 32

int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


#endif

+ 46
- 0
src/kem/hqc/hqc-rmrs-192/clean/code.c Целия файл

@@ -0,0 +1,46 @@
#include "code.h"
#include "parameters.h"
#include "reed_muller.h"
#include "reed_solomon.h"
#include <stdint.h>
#include <string.h>
/**
* @file code.c
* @brief Implementation of concatenated code
*/



/**
*
* @brief Encoding the message m to a code word em using the concatenated code
*
* First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain
* a concatenated code word.
*
* @param[out] em Pointer to an array that is the tensor code word
* @param[in] m Pointer to an array that is the message
*/
void PQCLEAN_HQCRMRS192_CLEAN_code_encode(uint8_t *em, const uint8_t *m) {
uint8_t tmp[VEC_N1_SIZE_BYTES] = {0};

PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_encode(tmp, m);
PQCLEAN_HQCRMRS192_CLEAN_reed_muller_encode(em, tmp);

}



/**
* @brief Decoding the code word em to a message m using the concatenated code
*
* @param[out] m Pointer to an array that is the message
* @param[in] em Pointer to an array that is the code word
*/
void PQCLEAN_HQCRMRS192_CLEAN_code_decode(uint8_t *m, const uint8_t *em) {
uint8_t tmp[VEC_N1_SIZE_BYTES] = {0};

PQCLEAN_HQCRMRS192_CLEAN_reed_muller_decode(tmp, em);
PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_decode(m, tmp);

}

+ 18
- 0
src/kem/hqc/hqc-rmrs-192/clean/code.h Целия файл

@@ -0,0 +1,18 @@
#ifndef CODE_H
#define CODE_H


/**
* @file code.h
* Header file of code.c
*/
#include "parameters.h"
#include <stddef.h>
#include <stdint.h>

void PQCLEAN_HQCRMRS192_CLEAN_code_encode(uint8_t *em, const uint8_t *message);

void PQCLEAN_HQCRMRS192_CLEAN_code_decode(uint8_t *m, const uint8_t *em);


#endif

+ 351
- 0
src/kem/hqc/hqc-rmrs-192/clean/fft.c Целия файл

@@ -0,0 +1,351 @@
#include "fft.h"
#include "gf.h"
#include "parameters.h"
#include <stdint.h>
#include <string.h>
/**
* @file fft.c
* Implementation of the additive FFT and its transpose.
* This implementation is based on the paper from Gao and Mateer: <br>
* Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
* IEEE Transactions on Information Theory 56 (2010), 6265--6272.
* http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
* and includes improvements proposed by Bernstein, Chou and Schwabe here:
* https://binary.cr.yp.to/mcbits-20130616.pdf
*/


static void compute_fft_betas(uint16_t *betas);
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size);
static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f);
static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas);


/**
* @brief Computes the basis of betas (omitting 1) used in the additive FFT and its transpose
*
* @param[out] betas Array of size PARAM_M-1
*/
static void compute_fft_betas(uint16_t *betas) {
size_t i;
for (i = 0; i < PARAM_M - 1; ++i) {
betas[i] = 1 << (PARAM_M - 1 - i);
}
}



/**
* @brief Computes the subset sums of the given set
*
* The array subset_sums is such that its ith element is
* the subset sum of the set elements given by the binary form of i.
*
* @param[out] subset_sums Array of size 2^set_size receiving the subset sums
* @param[in] set Array of set_size elements
* @param[in] set_size Size of the array set
*/
static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, uint16_t set_size) {
uint16_t i, j;
subset_sums[0] = 0;

for (i = 0; i < set_size; ++i) {
for (j = 0; j < (1 << i); ++j) {
subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j];
}
}
}



/**
* @brief Computes the radix conversion of a polynomial f in GF(2^m)[x]
*
* Computes f0 and f1 such that f(x) = f0(x^2-x) + x.f1(x^2-x)
* as proposed by Bernstein, Chou and Schwabe:
* https://binary.cr.yp.to/mcbits-20130616.pdf
*
* @param[out] f0 Array half the size of f
* @param[out] f1 Array half the size of f
* @param[in] f Array of size a power of 2
* @param[in] m_f 2^{m_f} is the smallest power of 2 greater or equal to the number of coefficients of f
*/
static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
switch (m_f) {
case 4:
f0[4] = f[8] ^ f[12];
f0[6] = f[12] ^ f[14];
f0[7] = f[14] ^ f[15];
f1[5] = f[11] ^ f[13];
f1[6] = f[13] ^ f[14];
f1[7] = f[15];
f0[5] = f[10] ^ f[12] ^ f1[5];
f1[4] = f[9] ^ f[13] ^ f0[5];

f0[0] = f[0];
f1[3] = f[7] ^ f[11] ^ f[15];
f0[3] = f[6] ^ f[10] ^ f[14] ^ f1[3];
f0[2] = f[4] ^ f0[4] ^ f0[3] ^ f1[3];
f1[1] = f[3] ^ f[5] ^ f[9] ^ f[13] ^ f1[3];
f1[2] = f[3] ^ f1[1] ^ f0[3];
f0[1] = f[2] ^ f0[2] ^ f1[1];
f1[0] = f[1] ^ f0[1];
break;

case 3:
f0[0] = f[0];
f0[2] = f[4] ^ f[6];
f0[3] = f[6] ^ f[7];
f1[1] = f[3] ^ f[5] ^ f[7];
f1[2] = f[5] ^ f[6];
f1[3] = f[7];
f0[1] = f[2] ^ f0[2] ^ f1[1];
f1[0] = f[1] ^ f0[1];
break;

case 2:
f0[0] = f[0];
f0[1] = f[2] ^ f[3];
f1[0] = f[1] ^ f0[1];
f1[1] = f[3];
break;

case 1:
f0[0] = f[0];
f1[0] = f[1];
break;

default:
radix_big(f0, f1, f, m_f);
break;
}
}

static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) {
uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0};
uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0};

uint16_t Q0[1 << (PARAM_FFT - 2)] = {0};
uint16_t Q1[1 << (PARAM_FFT - 2)] = {0};
uint16_t R0[1 << (PARAM_FFT - 2)] = {0};
uint16_t R1[1 << (PARAM_FFT - 2)] = {0};

size_t i, n;

n = 1;
n <<= (m_f - 2);
memcpy(Q, f + 3 * n, 2 * n);
memcpy(Q + n, f + 3 * n, 2 * n);
memcpy(R, f, 4 * n);

for (i = 0; i < n; ++i) {
Q[i] ^= f[2 * n + i];
R[n + i] ^= Q[i];
}

radix(Q0, Q1, Q, m_f - 1);
radix(R0, R1, R, m_f - 1);

memcpy(f0, R0, 2 * n);
memcpy(f0 + n, Q0, 2 * n);
memcpy(f1, R1, 2 * n);
memcpy(f1 + n, Q1, 2 * n);
}



/**
* @brief Evaluates f at all subset sums of a given set
*
* This function is a subroutine of the function PQCLEAN_HQCRMRS192_CLEAN_fft.
*
* @param[out] w Array
* @param[in] f Array
* @param[in] f_coeffs Number of coefficients of f
* @param[in] m Number of betas
* @param[in] m_f Number of coefficients of f (one more than its degree)
* @param[in] betas FFT constants
*/
static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) {
uint16_t f0[1 << (PARAM_FFT - 2)] = {0};
uint16_t f1[1 << (PARAM_FFT - 2)] = {0};
uint16_t gammas[PARAM_M - 2] = {0};
uint16_t deltas[PARAM_M - 2] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0};
uint16_t u[1 << (PARAM_M - 2)] = {0};
uint16_t v[1 << (PARAM_M - 2)] = {0};
uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0};

uint16_t beta_m_pow;
size_t i, j, k;
size_t x;

// Step 1
if (m_f == 1) {
for (i = 0; i < m; ++i) {
tmp[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(betas[i], f[1]);
}

w[0] = f[0];
x = 1;
for (j = 0; j < m; ++j) {
for (k = 0; k < x; ++k) {
w[x + k] = w[k] ^ tmp[j];
}
x <<= 1;
}

return;
}

// Step 2: compute g
if (betas[m - 1] != 1) {
beta_m_pow = 1;
x = 1;
x <<= m_f;
for (i = 1; i < x; ++i) {
beta_m_pow = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(beta_m_pow, betas[m - 1]);
f[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(beta_m_pow, f[i]);
}
}

// Step 3
radix(f0, f1, f, m_f);

// Step 4: compute gammas and deltas
for (i = 0; i + 1 < m; ++i) {
gammas[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(betas[i], PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(betas[m - 1]));
deltas[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_square(gammas[i]) ^ gammas[i];
}

// Compute gammas sums
compute_subset_sums(gammas_sums, gammas, m - 1);

// Step 5
fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas);

k = 1;
k <<= ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small.
if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant
w[0] = u[0];
w[k] = u[0] ^ f1[0];
for (i = 1; i < k; ++i) {
w[i] = u[i] ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(gammas_sums[i], f1[0]);
w[k + i] = w[i] ^ f1[0];
}
} else {
fft_rec(v, f1, f_coeffs / 2, m - 1, m_f - 1, deltas);

// Step 6
memcpy(w + k, v, 2 * k);
w[0] = u[0];
w[k] ^= u[0];
for (i = 1; i < k; ++i) {
w[i] = u[i] ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(gammas_sums[i], v[i]);
w[k + i] ^= w[i];
}
}
}



/**
* @brief Evaluates f on all fields elements using an additive FFT algorithm
*
* f_coeffs is the number of coefficients of f (one less than its degree). <br>
* The FFT proceeds recursively to evaluate f at all subset sums of a basis B. <br>
* This implementation is based on the paper from Gao and Mateer: <br>
* Shuhong Gao and Todd Mateer, Additive Fast Fourier Transforms over Finite Fields,
* IEEE Transactions on Information Theory 56 (2010), 6265--6272.
* http://www.math.clemson.edu/~sgao/papers/GM10.pdf <br>
* and includes improvements proposed by Bernstein, Chou and Schwabe here:
* https://binary.cr.yp.to/mcbits-20130616.pdf <br>
* Note that on this first call (as opposed to the recursive calls to fft_rec), gammas are equal to betas,
* meaning the first gammas subset sums are actually the subset sums of betas (except 1). <br>
* Also note that f is altered during computation (twisted at each level).
*
* @param[out] w Array
* @param[in] f Array of 2^PARAM_FFT elements
* @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1)
*/
void PQCLEAN_HQCRMRS192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) {
uint16_t betas[PARAM_M - 1] = {0};
uint16_t betas_sums[1 << (PARAM_M - 1)] = {0};
uint16_t f0[1 << (PARAM_FFT - 1)] = {0};
uint16_t f1[1 << (PARAM_FFT - 1)] = {0};
uint16_t deltas[PARAM_M - 1] = {0};
uint16_t u[1 << (PARAM_M - 1)] = {0};
uint16_t v[1 << (PARAM_M - 1)] = {0};

size_t i, k;

// Follows Gao and Mateer algorithm
compute_fft_betas(betas);

// Step 1: PARAM_FFT > 1, nothing to do

// Compute gammas sums
compute_subset_sums(betas_sums, betas, PARAM_M - 1);

// Step 2: beta_m = 1, nothing to do

// Step 3
radix(f0, f1, f, PARAM_FFT);

// Step 4: Compute deltas
for (i = 0; i < PARAM_M - 1; ++i) {
deltas[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_square(betas[i]) ^ betas[i];
}

// Step 5
fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);
fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas);

k = 1 << (PARAM_M - 1);
// Step 6, 7 and error polynomial computation
memcpy(w + k, v, 2 * k);

// Check if 0 is root
w[0] = u[0];

// Check if 1 is root
w[k] ^= u[0];

// Find other roots
for (i = 1; i < k; ++i) {
w[i] = u[i] ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(betas_sums[i], v[i]);
w[k + i] ^= w[i];
}
}



/**
* @brief Retrieves the error polynomial error from the evaluations w of the ELP (Error Locator Polynomial) on all field elements.
*
* @param[out] error Array with the error
* @param[out] error_compact Array with the error in a compact form
* @param[in] w Array of size 2^PARAM_M
*/
void PQCLEAN_HQCRMRS192_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) {
uint16_t gammas[PARAM_M - 1] = {0};
uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0};
uint16_t k;
size_t i, index;

compute_fft_betas(gammas);
compute_subset_sums(gammas_sums, gammas, PARAM_M - 1);

k = 1 << (PARAM_M - 1);
error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15);
error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15);

for (i = 1; i < k; ++i) {
index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i]];
error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15);

index = PARAM_GF_MUL_ORDER - gf_log[gammas_sums[i] ^ 1];
error[index] ^= 1 ^ ((uint16_t) - w[k + i] >> 15);
}
}

+ 18
- 0
src/kem/hqc/hqc-rmrs-192/clean/fft.h Целия файл

@@ -0,0 +1,18 @@
#ifndef FFT_H
#define FFT_H


/**
* @file fft.h
* Header file of fft.c
*/

#include <stddef.h>
#include <stdint.h>

void PQCLEAN_HQCRMRS192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs);

void PQCLEAN_HQCRMRS192_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w);


#endif

+ 63
- 0
src/kem/hqc/hqc-rmrs-192/clean/gf.c Целия файл

@@ -0,0 +1,63 @@
#include "gf.h"
#include "parameters.h"
#include <stdint.h>
/**
* @file gf.c
* Galois field implementation with multiplication using lookup tables
*/


/**
* @brief Multiplies nonzero element a by element b
* @returns the product a*b
* @param[in] a First element of GF(2^PARAM_M) to multiply (cannot be zero)
* @param[in] b Second element of GF(2^PARAM_M) to multiply (cannot be zero)
*/
uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_mul(uint16_t a, uint16_t b) {
uint16_t mask;
mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0
mask &= (uint16_t) (-((int32_t) b) >> 31); // b != 0
return mask & gf_exp[PQCLEAN_HQCRMRS192_CLEAN_gf_mod(gf_log[a] + gf_log[b])];
}



/**
* @brief Squares an element of GF(2^PARAM_M)
* @returns a^2
* @param[in] a Element of GF(2^PARAM_M)
*/
uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_square(uint16_t a) {
int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0
return mask & gf_exp[PQCLEAN_HQCRMRS192_CLEAN_gf_mod(2 * gf_log[a])];
}



/**
* @brief Computes the inverse of an element of GF(2^PARAM_M)
* @returns the inverse of a
* @param[in] a Element of GF(2^PARAM_M)
*/
uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(uint16_t a) {
int16_t mask = (uint16_t) (-((int32_t) a) >> 31); // a != 0
return mask & gf_exp[PARAM_GF_MUL_ORDER - gf_log[a]];
}



/**
* @brief Returns i modulo 2^PARAM_M-1
* i must be less than 2*(2^PARAM_M-1).
* Therefore, the return value is either i or i-2^PARAM_M+1.
* @returns i mod (2^PARAM_M-1)
* @param[in] i The integer whose modulo is taken
*/
uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_mod(uint16_t i) {
uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER);

// mask = 0xffff if(i < PARAM_GF_MUL_ORDER)
uint16_t mask = -(tmp >> 15);

return tmp + (mask & PARAM_GF_MUL_ORDER);
}

+ 39
- 0
src/kem/hqc/hqc-rmrs-192/clean/gf.h Целия файл

@@ -0,0 +1,39 @@
#ifndef GF_H
#define GF_H


/**
* @file gf.h
* Header file of gf.c
*/

#include <stddef.h>
#include <stdint.h>


/**
* Powers of the root alpha of 1 + x^2 + x^3 + x^4 + x^8.
* The last two elements are needed by the PQCLEAN_HQCRMRS192_CLEAN_gf_mul function
* (for example if both elements to multiply are zero).
*/
static const uint16_t gf_exp[258] = { 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240, 253, 231, 211, 187, 107, 214, 177, 127, 254, 225, 223, 163, 91, 182, 113, 226, 217, 175, 67, 134, 17, 34, 68, 136, 13, 26, 52, 104, 208, 189, 103, 206, 129, 31, 62, 124, 248, 237, 199, 147, 59, 118, 236, 197, 151, 51, 102, 204, 133, 23, 46, 92, 184, 109, 218, 169, 79, 158, 33, 66, 132, 21, 42, 84, 168, 77, 154, 41, 82, 164, 85, 170, 73, 146, 57, 114, 228, 213, 183, 115, 230, 209, 191, 99, 198, 145, 63, 126, 252, 229, 215, 179, 123, 246, 241, 255, 227, 219, 171, 75, 150, 49, 98, 196, 149, 55, 110, 220, 165, 87, 174, 65, 130, 25, 50, 100, 200, 141, 7, 14, 28, 56, 112, 224, 221, 167, 83, 166, 81, 162, 89, 178, 121, 242, 249, 239, 195, 155, 43, 86, 172, 69, 138, 9, 18, 36, 72, 144, 61, 122, 244, 245, 247, 243, 251, 235, 203, 139, 11, 22, 44, 88, 176, 125, 250, 233, 207, 131, 27, 54, 108, 216, 173, 71, 142, 1, 2, 4 };



/**
* Logarithm of elements of GF(2^8) to the base alpha (root of 1 + x^2 + x^3 + x^4 + x^8).
* The logarithm of 0 is set to 0 by convention.
*/
static const uint16_t gf_log[256] = { 0, 0, 1, 25, 2, 50, 26, 198, 3, 223, 51, 238, 27, 104, 199, 75, 4, 100, 224, 14, 52, 141, 239, 129, 28, 193, 105, 248, 200, 8, 76, 113, 5, 138, 101, 47, 225, 36, 15, 33, 53, 147, 142, 218, 240, 18, 130, 69, 29, 181, 194, 125, 106, 39, 249, 185, 201, 154, 9, 120, 77, 228, 114, 166, 6, 191, 139, 98, 102, 221, 48, 253, 226, 152, 37, 179, 16, 145, 34, 136, 54, 208, 148, 206, 143, 150, 219, 189, 241, 210, 19, 92, 131, 56, 70, 64, 30, 66, 182, 163, 195, 72, 126, 110, 107, 58, 40, 84, 250, 133, 186, 61, 202, 94, 155, 159, 10, 21, 121, 43, 78, 212, 229, 172, 115, 243, 167, 87, 7, 112, 192, 247, 140, 128, 99, 13, 103, 74, 222, 237, 49, 197, 254, 24, 227, 165, 153, 119, 38, 184, 180, 124, 17, 68, 146, 217, 35, 32, 137, 46, 55, 63, 209, 91, 149, 188, 207, 205, 144, 135, 151, 178, 220, 252, 190, 97, 242, 86, 211, 171, 20, 42, 93, 158, 132, 60, 57, 83, 71, 109, 65, 162, 31, 45, 67, 216, 183, 123, 164, 118, 196, 23, 73, 236, 127, 12, 111, 246, 108, 161, 59, 82, 41, 157, 85, 170, 251, 96, 134, 177, 187, 204, 62, 90, 203, 89, 95, 176, 156, 169, 160, 81, 11, 245, 22, 235, 122, 117, 44, 215, 79, 174, 213, 233, 230, 231, 173, 232, 116, 214, 244, 234, 168, 80, 88, 175 };


uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_mul(uint16_t a, uint16_t b);

uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_square(uint16_t a);

uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(uint16_t a);

uint16_t PQCLEAN_HQCRMRS192_CLEAN_gf_mod(uint16_t i);


#endif

+ 154
- 0
src/kem/hqc/hqc-rmrs-192/clean/gf2x.c Целия файл

@@ -0,0 +1,154 @@
#include "gf2x.h"
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include <stdint.h>
/**
* \file gf2x.c
* \brief Implementation of multiplication of two polynomials
*/


static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2);
static void reduce(uint64_t *o, const uint64_t *a);
static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx);

/**
* @brief swap two elements in a table
*
* This function exchanges tab[elt1] with tab[elt2]
*
* @param[in] tab Pointer to the table
* @param[in] elt1 Index of the first element
* @param[in] elt2 Index of the second element
*/
static inline void swap(uint16_t *tab, uint16_t elt1, uint16_t elt2) {
uint16_t tmp = tab[elt1];

tab[elt1] = tab[elt2];
tab[elt2] = tmp;
}



/**
* @brief Compute o(x) = a(x) mod \f$ X^n - 1\f$
*
* This function computes the modular reduction of the polynomial a(x)
*
* @param[in] a Pointer to the polynomial a(x)
* @param[out] o Pointer to the result
*/
static void reduce(uint64_t *o, const uint64_t *a) {
size_t i;
uint64_t r;
uint64_t carry;

for (i = 0; i < VEC_N_SIZE_64; i++) {
r = a[i + VEC_N_SIZE_64 - 1] >> (PARAM_N & 63);
carry = (uint64_t) (a[i + VEC_N_SIZE_64] << (64 - (PARAM_N & 63)));
o[i] = a[i] ^ r ^ carry;
}

o[VEC_N_SIZE_64 - 1] &= RED_MASK;
}



/**
* @brief computes product of the polynomial a1(x) with the sparse polynomial a2
*
* o(x) = a1(x)a2(x)
*
* @param[out] o Pointer to the result
* @param[in] a1 Pointer to the sparse polynomial a2 (list of degrees of the monomials which appear in a2)
* @param[in] a2 Pointer to the polynomial a1(x)
* @param[in] weight Hamming wifht of the sparse polynomial a2
* @param[in] ctx Pointer to a seed expander used to randomize the multiplication process
*/
static void fast_convolution_mult(uint8_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) {
//static uint32_t fast_convolution_mult(const uint64_t *A, const uint32_t *vB, uint64_t *C, const uint16_t w, AES_XOF_struct *ctx)
uint64_t carry;
uint32_t dec, s;
uint64_t table[16 * (VEC_N_SIZE_64 + 1)];
uint16_t permuted_table[16];
uint16_t permutation_table[16];
uint16_t permuted_sparse_vect[PARAM_OMEGA_E];
uint16_t permutation_sparse_vect[PARAM_OMEGA_E];
uint64_t tmp;
uint64_t *pt;
uint8_t *res;
size_t i, j;

for (i = 0; i < 16; i++) {
permuted_table[i] = (uint16_t) i;
}

seedexpander(ctx, (uint8_t *) permutation_table, 16 * sizeof(uint16_t));

for (i = 0; i < 15; i++) {
swap(permuted_table + i, 0, permutation_table[i] % (16 - i));
}

pt = table + (permuted_table[0] * (VEC_N_SIZE_64 + 1));
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = a2[j];
}
pt[VEC_N_SIZE_64] = 0x0;

for (i = 1; i < 16; i++) {
carry = 0;
pt = table + (permuted_table[i] * (VEC_N_SIZE_64 + 1));
for (j = 0; j < VEC_N_SIZE_64; j++) {
pt[j] = (a2[j] << i) ^ carry;
carry = (a2[j] >> ((64 - i)));
}
pt[VEC_N_SIZE_64] = carry;
}

for (i = 0; i < weight; i++) {
permuted_sparse_vect[i] = (uint16_t) i;
}

seedexpander(ctx, (uint8_t *) permutation_sparse_vect, weight * sizeof(uint16_t));

for (i = 0; i + 1 < weight; i++) {
swap(permuted_sparse_vect + i, 0, (uint16_t) (permutation_sparse_vect[i] % (weight - i)));
}

for (i = 0; i < weight; i++) {
dec = a1[permuted_sparse_vect[i]] & 0xf;
s = a1[permuted_sparse_vect[i]] >> 4;
res = o + 2 * s;
pt = table + (permuted_table[dec] * (VEC_N_SIZE_64 + 1));

for (j = 0; j < VEC_N_SIZE_64 + 1; j++) {
tmp = PQCLEAN_HQCRMRS192_CLEAN_load8(res);
PQCLEAN_HQCRMRS192_CLEAN_store8(res, tmp ^ pt[j]);
res += 8;
}
}
}



/**
* @brief Multiply two polynomials modulo \f$ X^n - 1\f$.
*
* This functions multiplies a sparse polynomial <b>a1</b> (of Hamming weight equal to <b>weight</b>)
* and a dense polynomial <b>a2</b>. The multiplication is done modulo \f$ X^n - 1\f$.
*
* @param[out] o Pointer to the result
* @param[in] a1 Pointer to the sparse polynomial
* @param[in] a2 Pointer to the dense polynomial
* @param[in] weight Integer that is the weigt of the sparse polynomial
* @param[in] ctx Pointer to the randomness context
*/
void PQCLEAN_HQCRMRS192_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx) {
uint64_t tmp[2 * VEC_N_SIZE_64 + 1] = {0};

fast_convolution_mult((uint8_t *) tmp, a1, a2, weight, ctx);
PQCLEAN_HQCRMRS192_CLEAN_load8_arr(tmp, 2 * VEC_N_SIZE_64 + 1, (uint8_t *) tmp, sizeof(tmp));
reduce(o, tmp);
}

+ 16
- 0
src/kem/hqc/hqc-rmrs-192/clean/gf2x.h Целия файл

@@ -0,0 +1,16 @@
#ifndef GF2X_H
#define GF2X_H


/**
* @file gf2x.h
* @brief Header file for gf2x.c
*/
#include "nistseedexpander.h"
#include "randombytes.h"
#include <stdint.h>

void PQCLEAN_HQCRMRS192_CLEAN_vect_mul(uint64_t *o, const uint32_t *a1, const uint64_t *a2, uint16_t weight, AES_XOF_struct *ctx);


#endif

+ 144
- 0
src/kem/hqc/hqc-rmrs-192/clean/hqc.c Целия файл

@@ -0,0 +1,144 @@
#include "code.h"
#include "gf2x.h"
#include "hqc.h"
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "vector.h"
#include <stdint.h>
/**
* @file hqc.c
* @brief Implementation of hqc.h
*/



/**
* @brief Keygen of the HQC_PKE IND_CPA scheme
*
* The public key is composed of the syndrome <b>s</b> as well as the <b>seed</b> used to generate the vector <b>h</b>.
*
* The secret key is composed of the <b>seed</b> used to generate vectors <b>x</b> and <b>y</b>.
* As a technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] pk String containing the public key
* @param[out] sk String containing the secret key
*/
void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk) {
AES_XOF_struct sk_seedexpander;
AES_XOF_struct pk_seedexpander;
uint8_t sk_seed[SEED_BYTES] = {0};
uint8_t pk_seed[SEED_BYTES] = {0};
uint64_t x[VEC_N_SIZE_64] = {0};
uint32_t y[PARAM_OMEGA] = {0};
uint64_t h[VEC_N_SIZE_64] = {0};
uint64_t s[VEC_N_SIZE_64] = {0};

// Create seed_expanders for public key and secret key
randombytes(sk_seed, SEED_BYTES);
seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

randombytes(pk_seed, SEED_BYTES);
seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);

// Compute secret key
PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA);

// Compute public key
PQCLEAN_HQCRMRS192_CLEAN_vect_set_random(&pk_seedexpander, h);
PQCLEAN_HQCRMRS192_CLEAN_vect_mul(s, y, h, PARAM_OMEGA, &sk_seedexpander);
PQCLEAN_HQCRMRS192_CLEAN_vect_add(s, x, s, VEC_N_SIZE_64);

// Parse keys to string
PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_to_string(pk, pk_seed, s);
PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_to_string(sk, sk_seed, pk);

}



/**
* @brief Encryption of the HQC_PKE IND_CPA scheme
*
* The cihertext is composed of vectors <b>u</b> and <b>v</b>.
*
* @param[out] u Vector u (first part of the ciphertext)
* @param[out] v Vector v (second part of the ciphertext)
* @param[in] m Vector representing the message to encrypt
* @param[in] theta Seed used to derive randomness required for encryption
* @param[in] pk String containing the public key
*/
void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk) {
AES_XOF_struct seedexpander;
uint64_t h[VEC_N_SIZE_64] = {0};
uint64_t s[VEC_N_SIZE_64] = {0};
uint64_t r1[VEC_N_SIZE_64] = {0};
uint32_t r2[PARAM_OMEGA_R] = {0};
uint64_t e[VEC_N_SIZE_64] = {0};
uint64_t tmp1[VEC_N_SIZE_64] = {0};
uint64_t tmp2[VEC_N_SIZE_64] = {0};

// Create seed_expander from theta
seedexpander_init(&seedexpander, theta, theta + 32, SEEDEXPANDER_MAX_LENGTH);

// Retrieve h and s from public key
PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_from_string(h, s, pk);

// Generate r1, r2 and e
PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(&seedexpander, r1, PARAM_OMEGA_R);
PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(&seedexpander, r2, PARAM_OMEGA_R);
PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(&seedexpander, e, PARAM_OMEGA_E);

// Compute u = r1 + r2.h
PQCLEAN_HQCRMRS192_CLEAN_vect_mul(u, r2, h, PARAM_OMEGA_R, &seedexpander);
PQCLEAN_HQCRMRS192_CLEAN_vect_add(u, r1, u, VEC_N_SIZE_64);

// Compute v = m.G by encoding the message
PQCLEAN_HQCRMRS192_CLEAN_code_encode((uint8_t *)v, m);
PQCLEAN_HQCRMRS192_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, (uint8_t *)v, VEC_N1N2_SIZE_BYTES);
PQCLEAN_HQCRMRS192_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);

// Compute v = m.G + s.r2 + e
PQCLEAN_HQCRMRS192_CLEAN_vect_mul(tmp2, r2, s, PARAM_OMEGA_R, &seedexpander);
PQCLEAN_HQCRMRS192_CLEAN_vect_add(tmp2, e, tmp2, VEC_N_SIZE_64);
PQCLEAN_HQCRMRS192_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64);
PQCLEAN_HQCRMRS192_CLEAN_vect_resize(v, PARAM_N1N2, tmp2, PARAM_N);

}



/**
* @brief Decryption of the HQC_PKE IND_CPA scheme
*
* @param[out] m Vector representing the decrypted message
* @param[in] u Vector u (first part of the ciphertext)
* @param[in] v Vector v (second part of the ciphertext)
* @param[in] sk String containing the secret key
*/
void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk) {
uint8_t pk[PUBLIC_KEY_BYTES] = {0};
uint64_t tmp1[VEC_N_SIZE_64] = {0};
uint64_t tmp2[VEC_N_SIZE_64] = {0};
uint32_t y[PARAM_OMEGA] = {0};
AES_XOF_struct perm_seedexpander;
uint8_t perm_seed[SEED_BYTES] = {0};

// Retrieve x, y, pk from secret key
PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_from_string(tmp1, y, pk, sk);

randombytes(perm_seed, SEED_BYTES);
seedexpander_init(&perm_seedexpander, perm_seed, perm_seed + 32, SEEDEXPANDER_MAX_LENGTH);

// Compute v - u.y
PQCLEAN_HQCRMRS192_CLEAN_vect_resize(tmp1, PARAM_N, v, PARAM_N1N2);
PQCLEAN_HQCRMRS192_CLEAN_vect_mul(tmp2, y, u, PARAM_OMEGA, &perm_seedexpander);
PQCLEAN_HQCRMRS192_CLEAN_vect_add(tmp2, tmp1, tmp2, VEC_N_SIZE_64);


// Compute m by decoding v - u.y
PQCLEAN_HQCRMRS192_CLEAN_store8_arr((uint8_t *)tmp1, VEC_N_SIZE_BYTES, tmp2, VEC_N_SIZE_64);
PQCLEAN_HQCRMRS192_CLEAN_code_decode(m, (uint8_t *)tmp1);
}

+ 19
- 0
src/kem/hqc/hqc-rmrs-192/clean/hqc.h Целия файл

@@ -0,0 +1,19 @@
#ifndef HQC_H
#define HQC_H


/**
* @file hqc.h
* @brief Functions of the HQC_PKE IND_CPA scheme
*/

#include <stdint.h>

void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_keygen(unsigned char *pk, unsigned char *sk);

void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_encrypt(uint64_t *u, uint64_t *v, uint8_t *m, unsigned char *theta, const unsigned char *pk);

void PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_decrypt(uint8_t *m, const uint64_t *u, const uint64_t *v, const unsigned char *sk);


#endif

+ 140
- 0
src/kem/hqc/hqc-rmrs-192/clean/kem.c Целия файл

@@ -0,0 +1,140 @@
#include "api.h"
#include "fips202.h"
#include "hqc.h"
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "sha2.h"
#include "vector.h"
#include <stdint.h>
#include <string.h>
/**
* @file kem.c
* @brief Implementation of api.h
*/



/**
* @brief Keygen of the HQC_KEM IND_CAA2 scheme
*
* The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>.
*
* The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
* As a technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] pk String containing the public key
* @param[out] sk String containing the secret key
* @returns 0 if keygen is successful
*/
int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) {

PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_keygen(pk, sk);
return 0;
}



/**
* @brief Encapsulation of the HQC_KEM IND_CAA2 scheme
*
* @param[out] ct String containing the ciphertext
* @param[out] ss String containing the shared secret
* @param[in] pk String containing the public key
* @returns 0 if encapsulation is successful
*/
int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) {

uint8_t theta[SHA512_BYTES] = {0};
uint8_t m[VEC_K_SIZE_BYTES] = {0};
uint64_t u[VEC_N_SIZE_64] = {0};
uint64_t v[VEC_N1N2_SIZE_64] = {0};
unsigned char d[SHA512_BYTES] = {0};
unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

// Computing m
randombytes(m, VEC_K_SIZE_BYTES);

// Computing theta
sha3_512(theta, m, VEC_K_SIZE_BYTES);

// Encrypting m
PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_encrypt(u, v, m, theta, pk);

// Computing d
sha512(d, m, VEC_K_SIZE_BYTES);

// Computing shared secret
memcpy(mc, m, VEC_K_SIZE_BYTES);
PQCLEAN_HQCRMRS192_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
PQCLEAN_HQCRMRS192_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

// Computing ciphertext
PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_to_string(ct, u, v, d);


return 0;
}



/**
* @brief Decapsulation of the HQC_KEM IND_CAA2 scheme
*
* @param[out] ss String containing the shared secret
* @param[in] ct String containing the cipĥertext
* @param[in] sk String containing the secret key
* @returns 0 if decapsulation is successful, -1 otherwise
*/
int PQCLEAN_HQCRMRS192_CLEAN_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) {

uint8_t result;
uint64_t u[VEC_N_SIZE_64] = {0};
uint64_t v[VEC_N1N2_SIZE_64] = {0};
unsigned char d[SHA512_BYTES] = {0};
unsigned char pk[PUBLIC_KEY_BYTES] = {0};
uint8_t m[VEC_K_SIZE_BYTES] = {0};
uint8_t theta[SHA512_BYTES] = {0};
uint64_t u2[VEC_N_SIZE_64] = {0};
uint64_t v2[VEC_N1N2_SIZE_64] = {0};
unsigned char d2[SHA512_BYTES] = {0};
unsigned char mc[VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES] = {0};

// Retrieving u, v and d from ciphertext
PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_from_string(u, v, d, ct);

// Retrieving pk from sk
memcpy(pk, sk + SEED_BYTES, PUBLIC_KEY_BYTES);

// Decryting
PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_decrypt(m, u, v, sk);

// Computing theta
sha3_512(theta, m, VEC_K_SIZE_BYTES);

// Encrypting m'
PQCLEAN_HQCRMRS192_CLEAN_hqc_pke_encrypt(u2, v2, m, theta, pk);

// Computing d'
sha512(d2, m, VEC_K_SIZE_BYTES);

// Computing shared secret
memcpy(mc, m, VEC_K_SIZE_BYTES);
PQCLEAN_HQCRMRS192_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
PQCLEAN_HQCRMRS192_CLEAN_store8_arr(mc + VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
sha512(ss, mc, VEC_K_SIZE_BYTES + VEC_N_SIZE_BYTES + VEC_N1N2_SIZE_BYTES);

// Abort if c != c' or d != d'
result = PQCLEAN_HQCRMRS192_CLEAN_vect_compare((uint8_t *)u, (uint8_t *)u2, VEC_N_SIZE_BYTES);
result |= PQCLEAN_HQCRMRS192_CLEAN_vect_compare((uint8_t *)v, (uint8_t *)v2, VEC_N1N2_SIZE_BYTES);
result |= PQCLEAN_HQCRMRS192_CLEAN_vect_compare(d, d2, SHA512_BYTES);
result = (uint8_t) (-((int16_t) result) >> 15);
for (size_t i = 0; i < SHARED_SECRET_BYTES; i++) {
ss[i] &= ~result;
}


return -(result & 1);
}

+ 98
- 0
src/kem/hqc/hqc-rmrs-192/clean/parameters.h Целия файл

@@ -0,0 +1,98 @@
#ifndef HQC_PARAMETERS_H
#define HQC_PARAMETERS_H


/**
* @file parameters.h
* @brief Parameters of the HQC_KEM IND-CCA2 scheme
*/
#include "api.h"


#define CEIL_DIVIDE(a, b) (((a)+(b)-1)/(b)) /*!< Divide a by b and ceil the result*/

/*
#define PARAM_N Define the parameter n of the scheme
#define PARAM_N1 Define the parameter n1 of the scheme (length of Reed-Solomon code)
#define PARAM_N2 Define the parameter n2 of the scheme (length of Duplicated Reed-Muller code)
#define PARAM_N1N2 Define the length in bits of the Concatenated code
#define PARAM_OMEGA Define the parameter omega of the scheme
#define PARAM_OMEGA_E Define the parameter omega_e of the scheme
#define PARAM_OMEGA_R Define the parameter omega_r of the scheme
#define PARAM_SECURITY Define the security level corresponding to the chosen parameters
#define PARAM_DFR_EXP Define the decryption failure rate corresponding to the chosen parameters

#define SECRET_KEY_BYTES Define the size of the secret key in bytes
#define PUBLIC_KEY_BYTES Define the size of the public key in bytes
#define SHARED_SECRET_BYTES Define the size of the shared secret in bytes
#define CIPHERTEXT_BYTES Define the size of the ciphertext in bytes

#define UTILS_REJECTION_THRESHOLD Define the rejection threshold used to generate given weight vectors (see vector_set_random_fixed_weight function)
#define VEC_N_SIZE_BYTES Define the size of the array used to store a PARAM_N sized vector in bytes
#define VEC_K_SIZE_BYTES Define the size of the array used to store a PARAM_K sized vector in bytes
#define VEC_N1Y_SIZE_BYTES Define the size of the array used to store a PARAM_N1 sized vector in bytes
#define VEC_N1N2_SIZE_BYTES Define the size of the array used to store a PARAM_N1N2 sized vector in bytes

#define VEC_N_SIZE_64 Define the size of the array used to store a PARAM_N sized vector in 64 bits
#define VEC_K_SIZE_64 Define the size of the array used to store a PARAM_K sized vector in 64 bits
#define VEC_N1_SIZE_64 Define the size of the array used to store a PARAM_N1 sized vector in 64 bits
#define VEC_N1N2_SIZE_64 Define the size of the array used to store a PARAM_N1N2 sized vector in 64 bits

#define PARAM_DELTA Define the parameter delta of the scheme (correcting capacity of the Reed-Solomon code)
#define PARAM_M Define a positive integer
#define PARAM_GF_POLY Generator polynomial of galois field GF(2^PARAM_M), represented in hexadecimial form
#define PARAM_GF_MUL_ORDER Define the size of the multiplicative group of GF(2^PARAM_M), i.e 2^PARAM_M -1
#define PARAM_K Define the size of the information bits of the Reed-Solomon code
#define PARAM_G Define the size of the generator polynomial of Reed-Solomon code
#define PARAM_FFT The additive FFT takes a 2^PARAM_FFT polynomial as input
We use the FFT to compute the roots of sigma, whose degree if PARAM_DELTA=24
The smallest power of 2 greater than 24+1 is 32=2^5
#define RS_POLY_COEFS Coefficients of the generator polynomial of the Reed-Solomon code

#define RED_MASK A mask fot the higher bits of a vector
#define SHA512_BYTES Define the size of SHA512 output in bytes
#define SEED_BYTES Define the size of the seed in bytes
#define SEEDEXPANDER_MAX_LENGTH Define the seed expander max length
*/

#define PARAM_N 35851
#define PARAM_N1 56
#define PARAM_N2 640
#define PARAM_N1N2 35840
#define PARAM_OMEGA 100
#define PARAM_OMEGA_E 114
#define PARAM_OMEGA_R 114
#define PARAM_SECURITY 192
#define PARAM_DFR_EXP 192

#define SECRET_KEY_BYTES PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_SECRETKEYBYTES
#define PUBLIC_KEY_BYTES PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_PUBLICKEYBYTES
#define SHARED_SECRET_BYTES PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_BYTES
#define CIPHERTEXT_BYTES PQCLEAN_HQCRMRS192_CLEAN_CRYPTO_CIPHERTEXTBYTES

#define UTILS_REJECTION_THRESHOLD 16742417
#define VEC_N_SIZE_BYTES CEIL_DIVIDE(PARAM_N, 8)
#define VEC_K_SIZE_BYTES PARAM_K
#define VEC_N1_SIZE_BYTES PARAM_N1
#define VEC_N1N2_SIZE_BYTES CEIL_DIVIDE(PARAM_N1N2, 8)

#define VEC_N_SIZE_64 CEIL_DIVIDE(PARAM_N, 64)
#define VEC_K_SIZE_64 CEIL_DIVIDE(PARAM_K, 8)
#define VEC_N1_SIZE_64 CEIL_DIVIDE(PARAM_N1, 8)
#define VEC_N1N2_SIZE_64 CEIL_DIVIDE(PARAM_N1N2, 64)

#define PARAM_DELTA 16
#define PARAM_M 8
#define PARAM_GF_POLY 0x11D
#define PARAM_GF_MUL_ORDER 255
#define PARAM_K 24
#define PARAM_G 33
#define PARAM_FFT 5
#define RS_POLY_COEFS 45,216,239,24,253,104,27,40,107,50,163,210,227,134,224,158,119,13,158,1,238,164,82,43,15,232,246,142,50,189,29,232,1

#define RED_MASK 0x7ff
#define SHA512_BYTES 64
#define SEED_BYTES 40
#define SEEDEXPANDER_MAX_LENGTH 4294967295

#endif

+ 186
- 0
src/kem/hqc/hqc-rmrs-192/clean/parsing.c Целия файл

@@ -0,0 +1,186 @@
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "vector.h"
#include <stdint.h>
#include <string.h>
/**
* @file parsing.c
* @brief Functions to parse secret key, public key and ciphertext of the HQC scheme
*/


void PQCLEAN_HQCRMRS192_CLEAN_store8(unsigned char *out, uint64_t in) {
out[0] = (in >> 0x00) & 0xFF;
out[1] = (in >> 0x08) & 0xFF;
out[2] = (in >> 0x10) & 0xFF;
out[3] = (in >> 0x18) & 0xFF;
out[4] = (in >> 0x20) & 0xFF;
out[5] = (in >> 0x28) & 0xFF;
out[6] = (in >> 0x30) & 0xFF;
out[7] = (in >> 0x38) & 0xFF;
}


uint64_t PQCLEAN_HQCRMRS192_CLEAN_load8(const unsigned char *in) {
uint64_t ret = in[7];

for (int8_t i = 6; i >= 0; i--) {
ret <<= 8;
ret |= in[i];
}

return ret;
}

void PQCLEAN_HQCRMRS192_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen) {
size_t index_in = 0;
size_t index_out = 0;

// first copy by 8 bytes
if (inlen >= 8 && outlen >= 1) {
while (index_out < outlen && index_in + 8 <= inlen) {
out64[index_out] = PQCLEAN_HQCRMRS192_CLEAN_load8(in8 + index_in);

index_in += 8;
index_out += 1;
}
}

// we now need to do the last 7 bytes if necessary
if (index_in >= inlen || index_out >= outlen) {
return;
}
out64[index_out] = in8[inlen - 1];
for (int8_t i = (int8_t)(inlen - index_in) - 2; i >= 0; i--) {
out64[index_out] <<= 8;
out64[index_out] |= in8[index_in + i];
}
}

void PQCLEAN_HQCRMRS192_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen) {
for (size_t index_out = 0, index_in = 0; index_out < outlen && index_in < inlen;) {
out8[index_out] = (in64[index_in] >> ((index_out % 8) * 8)) & 0xFF;
index_out++;
if (index_out % 8 == 0) {
index_in++;
}
}
}


/**
* @brief Parse a secret key into a string
*
* The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
* As technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] sk String containing the secret key
* @param[in] sk_seed Seed used to generate the secret key
* @param[in] pk String containing the public key
*/
void PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk) {
memcpy(sk, sk_seed, SEED_BYTES);
sk += SEED_BYTES;
memcpy(sk, pk, PUBLIC_KEY_BYTES);
}

/**
* @brief Parse a secret key from a string
*
* The secret key is composed of the seed used to generate vectors <b>x</b> and <b>y</b>.
* As technicality, the public key is appended to the secret key in order to respect NIST API.
*
* @param[out] x uint64_t representation of vector x
* @param[out] y uint32_t representation of vector y
* @param[out] pk String containing the public key
* @param[in] sk String containing the secret key
*/
void PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk) {
AES_XOF_struct sk_seedexpander;
uint8_t sk_seed[SEED_BYTES] = {0};

memcpy(sk_seed, sk, SEED_BYTES);
sk += SEED_BYTES;
memcpy(pk, sk, PUBLIC_KEY_BYTES);

seedexpander_init(&sk_seedexpander, sk_seed, sk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(&sk_seedexpander, x, PARAM_OMEGA);
PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(&sk_seedexpander, y, PARAM_OMEGA);
}

/**
* @brief Parse a public key into a string
*
* The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
*
* @param[out] pk String containing the public key
* @param[in] pk_seed Seed used to generate the public key
* @param[in] s uint8_t representation of vector s
*/
void PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s) {
memcpy(pk, pk_seed, SEED_BYTES);
PQCLEAN_HQCRMRS192_CLEAN_store8_arr(pk + SEED_BYTES, VEC_N_SIZE_BYTES, s, VEC_N_SIZE_64);
}



/**
* @brief Parse a public key from a string
*
* The public key is composed of the syndrome <b>s</b> as well as the seed used to generate the vector <b>h</b>
*
* @param[out] h uint8_t representation of vector h
* @param[out] s uint8_t representation of vector s
* @param[in] pk String containing the public key
*/
void PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk) {
AES_XOF_struct pk_seedexpander;
uint8_t pk_seed[SEED_BYTES] = {0};

memcpy(pk_seed, pk, SEED_BYTES);
pk += SEED_BYTES;
PQCLEAN_HQCRMRS192_CLEAN_load8_arr(s, VEC_N_SIZE_64, pk, VEC_N_SIZE_BYTES);

seedexpander_init(&pk_seedexpander, pk_seed, pk_seed + 32, SEEDEXPANDER_MAX_LENGTH);
PQCLEAN_HQCRMRS192_CLEAN_vect_set_random(&pk_seedexpander, h);
}


/**
* @brief Parse a ciphertext into a string
*
* The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
*
* @param[out] ct String containing the ciphertext
* @param[in] u uint8_t representation of vector u
* @param[in] v uint8_t representation of vector v
* @param[in] d String containing the hash d
*/
void PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d) {
PQCLEAN_HQCRMRS192_CLEAN_store8_arr(ct, VEC_N_SIZE_BYTES, u, VEC_N_SIZE_64);
ct += VEC_N_SIZE_BYTES;
PQCLEAN_HQCRMRS192_CLEAN_store8_arr(ct, VEC_N1N2_SIZE_BYTES, v, VEC_N1N2_SIZE_64);
ct += VEC_N1N2_SIZE_BYTES;
memcpy(ct, d, SHA512_BYTES);
}


/**
* @brief Parse a ciphertext from a string
*
* The ciphertext is composed of vectors <b>u</b>, <b>v</b> and hash <b>d</b>.
*
* @param[out] u uint8_t representation of vector u
* @param[out] v uint8_t representation of vector v
* @param[out] d String containing the hash d
* @param[in] ct String containing the ciphertext
*/
void PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct) {
PQCLEAN_HQCRMRS192_CLEAN_load8_arr(u, VEC_N_SIZE_64, ct, VEC_N_SIZE_BYTES);
ct += VEC_N_SIZE_BYTES;
PQCLEAN_HQCRMRS192_CLEAN_load8_arr(v, VEC_N1N2_SIZE_64, ct, VEC_N1N2_SIZE_BYTES);
ct += VEC_N1N2_SIZE_BYTES;
memcpy(d, ct, SHA512_BYTES);
}

+ 36
- 0
src/kem/hqc/hqc-rmrs-192/clean/parsing.h Целия файл

@@ -0,0 +1,36 @@
#ifndef PARSING_H
#define PARSING_H


/**
* @file parsing.h
* @brief Header file for parsing.c
*/

#include <stdint.h>

void PQCLEAN_HQCRMRS192_CLEAN_store8(unsigned char *out, uint64_t in);

uint64_t PQCLEAN_HQCRMRS192_CLEAN_load8(const unsigned char *in);

void PQCLEAN_HQCRMRS192_CLEAN_load8_arr(uint64_t *out64, size_t outlen, const uint8_t *in8, size_t inlen);

void PQCLEAN_HQCRMRS192_CLEAN_store8_arr(uint8_t *out8, size_t outlen, const uint64_t *in64, size_t inlen);


void PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_to_string(uint8_t *sk, const uint8_t *sk_seed, const uint8_t *pk);

void PQCLEAN_HQCRMRS192_CLEAN_hqc_secret_key_from_string(uint64_t *x, uint32_t *y, uint8_t *pk, const uint8_t *sk);


void PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_to_string(uint8_t *pk, const uint8_t *pk_seed, const uint64_t *s);

void PQCLEAN_HQCRMRS192_CLEAN_hqc_public_key_from_string(uint64_t *h, uint64_t *s, const uint8_t *pk);


void PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_to_string(uint8_t *ct, const uint64_t *u, const uint64_t *v, const uint8_t *d);

void PQCLEAN_HQCRMRS192_CLEAN_hqc_ciphertext_from_string(uint64_t *u, uint64_t *v, uint8_t *d, const uint8_t *ct);


#endif

+ 237
- 0
src/kem/hqc/hqc-rmrs-192/clean/reed_muller.c Целия файл

@@ -0,0 +1,237 @@
#include "parameters.h"
#include "reed_muller.h"
#include <stdint.h>
#include <string.h>
/**
* @file reed_muller.c
* Constant time implementation of Reed-Muller code RM(1,7)
*/



// number of repeated code words
#define MULTIPLICITY CEIL_DIVIDE(PARAM_N2, 128)

// copy bit 0 into all bits of a 32 bit value
#define BIT0MASK(x) (-((x) & 1))


static void encode(uint8_t *word, uint8_t message);
static void hadamard(uint16_t src[128], uint16_t dst[128]);
static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]);
static uint8_t find_peaks(const uint16_t transform[128]);



/**
* @brief Encode a single byte into a single codeword using RM(1,7)
*
* Encoding matrix of this code:
* bit pattern (note that bits are numbered big endian)
* 0 aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
* 1 cccccccc cccccccc cccccccc cccccccc
* 2 f0f0f0f0 f0f0f0f0 f0f0f0f0 f0f0f0f0
* 3 ff00ff00 ff00ff00 ff00ff00 ff00ff00
* 4 ffff0000 ffff0000 ffff0000 ffff0000
* 5 ffffffff 00000000 ffffffff 00000000
* 6 ffffffff ffffffff 00000000 00000000
* 7 ffffffff ffffffff ffffffff ffffffff
*
* @param[out] word An RM(1,7) codeword
* @param[in] message A message
*/
static void encode(uint8_t *word, uint8_t message) {
uint32_t e;
// bit 7 flips all the bits, do that first to save work
e = BIT0MASK(message >> 7);
// bits 0, 1, 2, 3, 4 are the same for all four longs
// (Warning: in the bit matrix above, low bits are at the left!)
e ^= BIT0MASK(message >> 0) & 0xaaaaaaaa;
e ^= BIT0MASK(message >> 1) & 0xcccccccc;
e ^= BIT0MASK(message >> 2) & 0xf0f0f0f0;
e ^= BIT0MASK(message >> 3) & 0xff00ff00;
e ^= BIT0MASK(message >> 4) & 0xffff0000;
// we can store this in the first quarter
word[0 + 0] = (e >> 0x00) & 0xff;
word[0 + 1] = (e >> 0x08) & 0xff;
word[0 + 2] = (e >> 0x10) & 0xff;
word[0 + 3] = (e >> 0x18) & 0xff;
// bit 5 flips entries 1 and 3; bit 6 flips 2 and 3
e ^= BIT0MASK(message >> 5);
word[4 + 0] = (e >> 0x00) & 0xff;
word[4 + 1] = (e >> 0x08) & 0xff;
word[4 + 2] = (e >> 0x10) & 0xff;
word[4 + 3] = (e >> 0x18) & 0xff;
e ^= BIT0MASK(message >> 6);
word[12 + 0] = (e >> 0x00) & 0xff;
word[12 + 1] = (e >> 0x08) & 0xff;
word[12 + 2] = (e >> 0x10) & 0xff;
word[12 + 3] = (e >> 0x18) & 0xff;
e ^= BIT0MASK(message >> 5);
word[8 + 0] = (e >> 0x00) & 0xff;
word[8 + 1] = (e >> 0x08) & 0xff;
word[8 + 2] = (e >> 0x10) & 0xff;
word[8 + 3] = (e >> 0x18) & 0xff;
}



/**
* @brief Hadamard transform
*
* Perform hadamard transform of src and store result in dst
* src is overwritten: it is also used as intermediate buffer
* Method is best explained if we use H(3) instead of H(7):
*
* The routine multiplies by the matrix H(3):
* [1 1 1 1 1 1 1 1]
* [1 -1 1 -1 1 -1 1 -1]
* [1 1 -1 -1 1 1 -1 -1]
* [a b c d e f g h] * [1 -1 -1 1 1 -1 -1 1] = result of routine
* [1 1 1 1 -1 -1 -1 -1]
* [1 -1 1 -1 -1 1 -1 1]
* [1 1 -1 -1 -1 -1 1 1]
* [1 -1 -1 1 -1 1 1 -1]
* You can do this in three passes, where each pass does this:
* set lower half of buffer to pairwise sums,
* and upper half to differences
* index 0 1 2 3 4 5 6 7
* input: a, b, c, d, e, f, g, h
* pass 1: a+b, c+d, e+f, g+h, a-b, c-d, e-f, g-h
* pass 2: a+b+c+d, e+f+g+h, a-b+c-d, e-f+g-h, a+b-c-d, e+f-g-h, a-b-c+d, e-f-g+h
* pass 3: a+b+c+d+e+f+g+h a+b-c-d+e+f-g-h a+b+c+d-e-f-g-h a+b-c-d-e+-f+g+h
* a-b+c-d+e-f+g-h a-b-c+d+e-f-g+h a-b+c-d-e+f-g+h a-b-c+d-e+f+g-h
* This order of computation is chosen because it vectorises well.
* Likewise, this routine multiplies by H(7) in seven passes.
*
* @param[out] src Structure that contain the expanded codeword
* @param[out] dst Structure that contain the expanded codeword
*/
static void hadamard(uint16_t src[128], uint16_t dst[128]) {
// the passes move data:
// src -> dst -> src -> dst -> src -> dst -> src -> dst
// using p1 and p2 alternately
uint16_t *p1 = src;
uint16_t *p2 = dst;
uint16_t *p3;
for (uint32_t pass = 0; pass < 7; pass++) {
for (uint32_t i = 0; i < 64; i++) {
p2[i] = p1[2 * i] + p1[2 * i + 1];
p2[i + 64] = p1[2 * i] - p1[2 * i + 1];
}
// swap p1, p2 for next round
p3 = p1;
p1 = p2;
p2 = p3;
}
}



/**
* @brief Add multiple codewords into expanded codeword
*
* Accesses memory in order
* Note: this does not write the codewords as -1 or +1 as the green machine does
* instead, just 0 and 1 is used.
* The resulting hadamard transform has:
* all values are halved
* the first entry is 64 too high
*
* @param[out] dest Structure that contain the expanded codeword
* @param[in] src Structure that contain the codeword
*/
static void expand_and_sum(uint16_t dest[128], const uint8_t src[16 * MULTIPLICITY]) {
size_t part, bit, copy;
// start with the first copy
for (part = 0; part < 16; part++) {
for (bit = 0; bit < 8; bit++) {
dest[part * 8 + bit] = (uint16_t) ((src[part] >> bit) & 1);
}
}
// sum the rest of the copies
for (copy = 1; copy < MULTIPLICITY; copy++) {
for (part = 0; part < 16; part++) {
for (bit = 0; bit < 8; bit++) {
dest[part * 8 + bit] += (uint16_t) ((src[16 * copy + part] >> bit) & 1);
}
}
}
}



/**
* @brief Finding the location of the highest value
*
* This is the final step of the green machine: find the location of the highest value,
* and add 128 if the peak is positive
* if there are two identical peaks, the peak with smallest value
* in the lowest 7 bits it taken
* @param[in] transform Structure that contain the expanded codeword
*/
static uint8_t find_peaks(const uint16_t transform[128]) {
uint16_t peak_abs = 0;
uint16_t peak = 0;
uint16_t pos = 0;
uint16_t t, abs, mask;
for (uint16_t i = 0; i < 128; i++) {
t = transform[i];
abs = t ^ ((-(t >> 15)) & (t ^ -t)); // t = abs(t)
mask = -(((uint16_t)(peak_abs - abs)) >> 15);
peak ^= mask & (peak ^ t);
pos ^= mask & (pos ^ i);
peak_abs ^= mask & (peak_abs ^ abs);
}
pos |= 128 & ((peak >> 15) - 1);
return (uint8_t) pos;
}




/**
* @brief Encodes the received word
*
* The message consists of N1 bytes each byte is encoded into PARAM_N2 bits,
* or MULTIPLICITY repeats of 128 bits
*
* @param[out] cdw Array of size VEC_N1N2_SIZE_64 receiving the encoded message
* @param[in] msg Array of size VEC_N1_SIZE_64 storing the message
*/
void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg) {
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
// encode first word
encode(&cdw[16 * i * MULTIPLICITY], msg[i]);
// copy to other identical codewords
for (size_t copy = 1; copy < MULTIPLICITY; copy++) {
memcpy(&cdw[16 * i * MULTIPLICITY + 16 * copy], &cdw[16 * i * MULTIPLICITY], 16);
}
}
}



/**
* @brief Decodes the received word
*
* Decoding uses fast hadamard transform, for a more complete picture on Reed-Muller decoding, see MacWilliams, Florence Jessie, and Neil James Alexander Sloane.
* The theory of error-correcting codes codes @cite macwilliams1977theory
*
* @param[out] msg Array of size VEC_N1_SIZE_64 receiving the decoded message
* @param[in] cdw Array of size VEC_N1N2_SIZE_64 storing the received word
*/
void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw) {
uint16_t expanded[128];
uint16_t transform[128];
for (size_t i = 0; i < VEC_N1_SIZE_BYTES; i++) {
// collect the codewords
expand_and_sum(expanded, &cdw[16 * i * MULTIPLICITY]);
// apply hadamard transform
hadamard(expanded, transform);
// fix the first entry to get the half Hadamard transform
transform[0] -= 64 * MULTIPLICITY;
// finish the decoding
msg[i] = find_peaks(transform);
}
}

+ 18
- 0
src/kem/hqc/hqc-rmrs-192/clean/reed_muller.h Целия файл

@@ -0,0 +1,18 @@
#ifndef REED_MULLER_H
#define REED_MULLER_H


/**
* @file reed_muller.h
* Header file of reed_muller.c
*/
#include "parameters.h"
#include <stddef.h>
#include <stdint.h>

void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_encode(uint8_t *cdw, const uint8_t *msg);

void PQCLEAN_HQCRMRS192_CLEAN_reed_muller_decode(uint8_t *msg, const uint8_t *cdw);


#endif

+ 349
- 0
src/kem/hqc/hqc-rmrs-192/clean/reed_solomon.c Целия файл

@@ -0,0 +1,349 @@
#include "fft.h"
#include "gf.h"
#include "parameters.h"
#include "parsing.h"
#include "reed_solomon.h"
#include <stdint.h>
#include <stdio.h>
#include <string.h>
/**
* @file reed_solomon.c
* Constant time implementation of Reed-Solomon codes
*/


static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw);
static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
static void compute_roots(uint8_t *error, uint16_t *sigma);
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes);
static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error);
static void correct_errors(uint8_t *cdw, const uint16_t *error_values);

/**
* @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes
*
* Following @cite lin1983error (Chapter 4 - Cyclic Codes),
* We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register
* with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code.
*
* @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message
* @param[in] msg Array of size VEC_K_SIZE_64 storing the message
*/
void PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) {
size_t i, j, k;
uint8_t gate_value = 0;

uint16_t tmp[PARAM_G] = {0};
uint16_t PARAM_RS_POLY [] = {RS_POLY_COEFS};
uint8_t prev, x;

for (i = 0; i < PARAM_N1; ++i) {
cdw[i] = 0;
}

for (i = 0; i < PARAM_K; ++i) {
gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]);

for (j = 0; j < PARAM_G; ++j) {
tmp[j] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(gate_value, PARAM_RS_POLY[j]);
}

prev = 0;
for (k = 0; k < PARAM_N1 - PARAM_K; k++) {
x = cdw[k];
cdw[k] = (uint8_t) (prev ^ tmp[k]);
prev = x;
}
}

memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K);
}



/**
* @brief Computes 2 * PARAM_DELTA syndromes
*
* @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes
* @param[in] cdw Array of size PARAM_N1 storing the received vector
*/
void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) {
for (size_t i = 0; i < 2 * PARAM_DELTA; ++i) {
for (size_t j = 1; j < PARAM_N1; ++j) {
syndromes[i] ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(cdw[j], alpha_ij_pow[i][j - 1]);
}
syndromes[i] ^= cdw[0];
}
}



/**
* @brief Computes the error locator polynomial (ELP) sigma
*
* This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes). <br>
* We use the letter p for rho which is initialized at -1. <br>
* The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X). <br>
* Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p. <br>
* sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated. <br>
* We can properly correct only if the degree of sigma does not exceed PARAM_DELTA.
* This means only the first PARAM_DELTA + 1 coefficients of sigma are of value
* and we only need to save its first PARAM_DELTA - 1 coefficients.
*
* @returns the degree of the ELP sigma
* @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP
* @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes
*/
static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) {
uint16_t deg_sigma = 0;
uint16_t deg_sigma_p = 0;
uint16_t deg_sigma_copy = 0;
uint16_t sigma_copy[PARAM_DELTA + 1] = {0};
uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1};
uint16_t pp = (uint16_t) -1; // 2*rho
uint16_t d_p = 1;
uint16_t d = syndromes[0];

uint16_t mask1, mask2, mask12;
uint16_t deg_X, deg_X_sigma_p;
uint16_t dd;
uint16_t mu;

uint16_t i;

sigma[0] = 1;
for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) {
// Save sigma in case we need it to update X_sigma_p
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA));
deg_sigma_copy = deg_sigma;

dd = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(d, PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(d_p));

for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
sigma[i] ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(dd, X_sigma_p[i]);
}

deg_X = mu - pp;
deg_X_sigma_p = deg_X + deg_sigma_p;

// mask1 = 0xffff if(d != 0) and 0 otherwise
mask1 = -((uint16_t) - d >> 15);

// mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise
mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15);

// mask12 = 0xffff if the deg_sigma increased and 0 otherwise
mask12 = mask1 & mask2;
deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma);

if (mu == (2 * PARAM_DELTA - 1)) {
break;
}

pp ^= mask12 & (mu ^ pp);
d_p ^= mask12 & (d ^ d_p);
for (i = PARAM_DELTA; i; --i) {
X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]);
}

deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p);
d = syndromes[mu + 1];

for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
d ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(sigma[i], syndromes[mu + 1 - i]);
}
}

return deg_sigma;
}



/**
* @brief Computes the error polynomial error from the error locator polynomial sigma
*
* See function PQCLEAN_HQCRMRS192_CLEAN_fft for more details.
*
* @param[out] error Array of 2^PARAM_M elements receiving the error polynomial
* @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error
* @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
*/
static void compute_roots(uint8_t *error, uint16_t *sigma) {
uint16_t w[1 << PARAM_M] = {0};

PQCLEAN_HQCRMRS192_CLEAN_fft(w, sigma, PARAM_DELTA + 1);
PQCLEAN_HQCRMRS192_CLEAN_fft_retrieve_error_poly(error, w);
}



/**
* @brief Computes the polynomial z(x)
*
* See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
*
* @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x)
* @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
* @param[in] degree Integer that is the degree of polynomial sigma
* @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes
*/
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) {
size_t i, j;
uint16_t mask;

z[0] = 1;

for (i = 1; i < PARAM_DELTA + 1; ++i) {
mask = -((uint16_t) (i - degree - 1) >> 15);
z[i] = mask & sigma[i];
}

z[1] ^= syndromes[0];

for (i = 2; i <= PARAM_DELTA; ++i) {
mask = -((uint16_t) (i - degree - 1) >> 15);
z[i] ^= mask & syndromes[i - 1];

for (j = 1; j < i; ++j) {
z[i] ^= mask & PQCLEAN_HQCRMRS192_CLEAN_gf_mul(sigma[j], syndromes[i - j - 1]);
}
}
}



/**
* @brief Computes the error values
*
* See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
*
* @param[out] error_values Array of PARAM_DELTA elements receiving the error values
* @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x)
* @param[in] z_degree Integer that is the degree of polynomial z(x)
* @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error
*/
static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) {
uint16_t beta_j[PARAM_DELTA] = {0};
uint16_t e_j[PARAM_DELTA] = {0};

uint16_t delta_counter;
uint16_t delta_real_value;
uint16_t found;
uint16_t mask1;
uint16_t mask2;
uint16_t tmp1;
uint16_t tmp2;
uint16_t inverse;
uint16_t inverse_power_j;

// Compute the beta_{j_i} page 31 of the documentation
delta_counter = 0;
for (size_t i = 0; i < PARAM_N1; i++) {
found = 0;
mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
for (size_t j = 0; j < PARAM_DELTA; j++) {
mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
beta_j[j] += mask1 & mask2 & gf_exp[i];
found += mask1 & mask2 & 1;
}
delta_counter += found;
}
delta_real_value = delta_counter;

// Compute the e_{j_i} page 31 of the documentation
for (size_t i = 0; i < PARAM_DELTA; ++i) {
tmp1 = 1;
tmp2 = 1;
inverse = PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(beta_j[i]);
inverse_power_j = 1;

for (size_t j = 1; j <= PARAM_DELTA; ++j) {
inverse_power_j = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(inverse_power_j, inverse);
tmp1 ^= PQCLEAN_HQCRMRS192_CLEAN_gf_mul(inverse_power_j, z[j]);
}
for (size_t k = 1; k < PARAM_DELTA; ++k) {
tmp2 = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA])));
}
mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value
e_j[i] = mask1 & PQCLEAN_HQCRMRS192_CLEAN_gf_mul(tmp1, PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(tmp2));
}

// Place the delta e_{j_i} values at the right coordinates of the output vector
delta_counter = 0;
for (size_t i = 0; i < PARAM_N1; ++i) {
found = 0;
mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
for (size_t j = 0; j < PARAM_DELTA; j++) {
mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
error_values[i] += mask1 & mask2 & e_j[j];
found += mask1 & mask2 & 1;
}
delta_counter += found;
}
}



/**
* @brief Correct the errors
*
* @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector
* @param[in] error Array of the error vector
* @param[in] error_values Array of PARAM_DELTA elements storing the error values
*/
static void correct_errors(uint8_t *cdw, const uint16_t *error_values) {
for (size_t i = 0; i < PARAM_N1; ++i) {
cdw[i] ^= error_values[i];
}
}



/**
* @brief Decodes the received word
*
* This function relies on six steps:
* <ol>
* <li> The first step, is the computation of the 2*PARAM_DELTA syndromes.
* <li> The second step is the computation of the error-locator polynomial sigma.
* <li> The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses.
* <li> The fourth step, is the polynomial z(x).
* <li> The fifth step, is the computation of the error values.
* <li> The sixth step is the correction of the errors in the received polynomial.
* </ol>
* For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error
*
* @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message
* @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word
*/
void PQCLEAN_HQCRMRS192_CLEAN_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) {
uint16_t syndromes[2 * PARAM_DELTA] = {0};
uint16_t sigma[1 << PARAM_FFT] = {0};
uint8_t error[1 << PARAM_M] = {0};
uint16_t z[PARAM_N1] = {0};
uint16_t error_values[PARAM_N1] = {0};
uint16_t deg;

// Calculate the 2*PARAM_DELTA syndromes
compute_syndromes(syndromes, cdw);

// Compute the error locator polynomial sigma
// Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room
deg = compute_elp(sigma, syndromes);

// Compute the error polynomial error
compute_roots(error, sigma);

// Compute the polynomial z(x)
compute_z_poly(z, sigma, deg, syndromes);

// Compute the error values
compute_error_values(error_values, z, error);

// Correct the errors
correct_errors(cdw, error_values);

// Retrieve the message from the decoded codeword
memcpy(msg, cdw + (PARAM_G - 1), PARAM_K);

}

+ 20
- 0
src/kem/hqc/hqc-rmrs-192/clean/reed_solomon.h
Файловите разлики са ограничени, защото са твърде много
Целия файл


+ 176
- 0
src/kem/hqc/hqc-rmrs-192/clean/vector.c Целия файл

@@ -0,0 +1,176 @@
#include "nistseedexpander.h"
#include "parameters.h"
#include "parsing.h"
#include "randombytes.h"
#include "vector.h"
#include <stdint.h>
#include <string.h>
/**
* @file vector.c
* @brief Implementation of vectors sampling and some utilities for the HQC scheme
*/


/**
* @brief Generates a vector of a given Hamming weight
*
* This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>. The vector
* is stored by position.
* To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
* 1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
* 2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times 70853\f$
* 3. If \f$ x \geq t\f$, go to 1
* 4. It return \f$ r = x \mod 70853\f$
*
* The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
*
* @param[in] v Pointer to an array
* @param[in] weight Integer that is the Hamming weight
* @param[in] ctx Pointer to the context of the seed expander
*/
void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight) {
size_t random_bytes_size = 3 * weight;
uint8_t rand_bytes[3 * PARAM_OMEGA_R] = {0}; // weight is expected to be <= PARAM_OMEGA_R
uint8_t inc;
size_t i, j;

i = 0;
j = random_bytes_size;
while (i < weight) {
do {
if (j == random_bytes_size) {
seedexpander(ctx, rand_bytes, random_bytes_size);
j = 0;
}

v[i] = ((uint32_t) rand_bytes[j++]) << 16;
v[i] |= ((uint32_t) rand_bytes[j++]) << 8;
v[i] |= rand_bytes[j++];

} while (v[i] >= UTILS_REJECTION_THRESHOLD);

v[i] = v[i] % PARAM_N;

inc = 1;
for (size_t k = 0; k < i; k++) {
if (v[k] == v[i]) {
inc = 0;
}
}
i += inc;
}
}



/**
* @brief Generates a vector of a given Hamming weight
*
* This function generates uniformly at random a binary vector of a Hamming weight equal to the parameter <b>weight</b>.
* To generate the vector we have to sample uniformly at random values in the interval [0, PARAM_N -1]. Suppose the PARAM_N is equal to \f$ 70853 \f$, to select a position \f$ r\f$ the function works as follow:
* 1. It makes a call to the seedexpander function to obtain a random number \f$ x\f$ in \f$ [0, 2^{24}[ \f$.
* 2. Let \f$ t = \lfloor {2^{24} \over 70853} \rfloor \times 70853\f$
* 3. If \f$ x \geq t\f$, go to 1
* 4. It return \f$ r = x \mod 70853\f$
*
* The parameter \f$ t \f$ is precomputed and it's denoted by UTILS_REJECTION_THRESHOLD (see the file parameters.h).
*
* @param[in] v Pointer to an array
* @param[in] weight Integer that is the Hamming weight
* @param[in] ctx Pointer to the context of the seed expander
*/
void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight) {
uint32_t tmp[PARAM_OMEGA_R] = {0};

PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(ctx, tmp, weight);

for (size_t i = 0; i < weight; ++i) {
int32_t index = tmp[i] / 64;
int32_t pos = tmp[i] % 64;
v[index] |= ((uint64_t) 1) << pos;
}
}



/**
* @brief Generates a random vector of dimension <b>PARAM_N</b>
*
* This function generates a random binary vector of dimension <b>PARAM_N</b>. It generates a random
* array of bytes using the seedexpander function, and drop the extra bits using a mask.
*
* @param[in] v Pointer to an array
* @param[in] ctx Pointer to the context of the seed expander
*/
void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v) {
uint8_t rand_bytes[VEC_N_SIZE_BYTES] = {0};

seedexpander(ctx, rand_bytes, VEC_N_SIZE_BYTES);

PQCLEAN_HQCRMRS192_CLEAN_load8_arr(v, VEC_N_SIZE_64, rand_bytes, VEC_N_SIZE_BYTES);
v[VEC_N_SIZE_64 - 1] &= RED_MASK;
}



/**
* @brief Adds two vectors
*
* @param[out] o Pointer to an array that is the result
* @param[in] v1 Pointer to an array that is the first vector
* @param[in] v2 Pointer to an array that is the second vector
* @param[in] size Integer that is the size of the vectors
*/
void PQCLEAN_HQCRMRS192_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size) {
for (uint32_t i = 0; i < size; ++i) {
o[i] = v1[i] ^ v2[i];
}
}



/**
* @brief Compares two vectors
*
* @param[in] v1 Pointer to an array that is first vector
* @param[in] v2 Pointer to an array that is second vector
* @param[in] size Integer that is the size of the vectors
* @returns 0 if the vectors are equals and a negative/psotive value otherwise
*/
uint8_t PQCLEAN_HQCRMRS192_CLEAN_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size) {
uint64_t r = 0;
for (size_t i = 0; i < size; i++) {
r |= v1[i] ^ v2[i];
}
r = (~r + 1) >> 63;
return (uint8_t) r;
}



/**
* @brief Resize a vector so that it contains <b>size_o</b> bits
*
* @param[out] o Pointer to the output vector
* @param[in] size_o Integer that is the size of the output vector in bits
* @param[in] v Pointer to the input vector
* @param[in] size_v Integer that is the size of the input vector in bits
*/
void PQCLEAN_HQCRMRS192_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v) {
if (size_o < size_v) {
uint64_t mask = 0x7FFFFFFFFFFFFFFF;
int8_t val = 0;

if (size_o % 64) {
val = 64 - (size_o % 64);
}

memcpy(o, v, 8 * VEC_N1N2_SIZE_64);

for (int8_t i = 0; i < val; ++i) {
o[VEC_N1N2_SIZE_64 - 1] &= (mask >> i);
}
} else {
memcpy(o, v, 8 * CEIL_DIVIDE(size_v, 64));
}
}

+ 27
- 0
src/kem/hqc/hqc-rmrs-192/clean/vector.h Целия файл

@@ -0,0 +1,27 @@
#ifndef VECTOR_H
#define VECTOR_H


/**
* @file vector.h
* @brief Header file for vector.c
*/
#include "nistseedexpander.h"
#include "randombytes.h"
#include <stdint.h>

void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight_by_coordinates(AES_XOF_struct *ctx, uint32_t *v, uint16_t weight);

void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random_fixed_weight(AES_XOF_struct *ctx, uint64_t *v, uint16_t weight);

void PQCLEAN_HQCRMRS192_CLEAN_vect_set_random(AES_XOF_struct *ctx, uint64_t *v);


void PQCLEAN_HQCRMRS192_CLEAN_vect_add(uint64_t *o, const uint64_t *v1, const uint64_t *v2, uint32_t size);

uint8_t PQCLEAN_HQCRMRS192_CLEAN_vect_compare(const uint8_t *v1, const uint8_t *v2, uint32_t size);

void PQCLEAN_HQCRMRS192_CLEAN_vect_resize(uint64_t *o, uint32_t size_o, const uint64_t *v, uint32_t size_v);


#endif

+ 16
- 0
src/kem/hqc/hqc-rmrs-256/avx2/CMakeLists.txt Целия файл

@@ -0,0 +1,16 @@
set(
SRC_AVX2_HQCRMRS256
code.c
fft.c
gf2x.c
gf.c
hqc.c
kem.c
parsing.c
reed_muller.c
reed_solomon.c
vector.c
)

define_kem_alg(hqcrmrs256_avx2
PQCLEAN_HQCRMRS256_CLEAN "${SRC_AVX2_HQCRMRS256}" "${CMAKE_CURRENT_SOURCE_DIR}")

+ 25
- 0
src/kem/hqc/hqc-rmrs-256/avx2/api.h Целия файл

@@ -0,0 +1,25 @@
#ifndef PQCLEAN_HQCRMRS256_AVX2_API_H
#define PQCLEAN_HQCRMRS256_AVX2_API_H
/**
* @file api.h
* @brief NIST KEM API used by the HQC_KEM IND-CCA2 scheme
*/

#define PQCLEAN_HQCRMRS256_AVX2_CRYPTO_ALGNAME "HQC-RMRS-256"

#define PQCLEAN_HQCRMRS256_AVX2_CRYPTO_SECRETKEYBYTES 7285
#define PQCLEAN_HQCRMRS256_AVX2_CRYPTO_PUBLICKEYBYTES 7245
#define PQCLEAN_HQCRMRS256_AVX2_CRYPTO_BYTES 64
#define PQCLEAN_HQCRMRS256_AVX2_CRYPTO_CIPHERTEXTBYTES 14469

// As a technicality, the public key is appended to the secret key in order to respect the NIST API.
// Without this constraint, PQCLEAN_HQCRMRS256_AVX2_CRYPTO_SECRETKEYBYTES would be defined as 32

int PQCLEAN_HQCRMRS256_AVX2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk);

int PQCLEAN_HQCRMRS256_AVX2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk);

int PQCLEAN_HQCRMRS256_AVX2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk);


#endif

+ 47
- 0
src/kem/hqc/hqc-rmrs-256/avx2/code.c Целия файл

@@ -0,0 +1,47 @@
#include "code.h"
#include "parameters.h"
#include "reed_muller.h"
#include "reed_solomon.h"
#include <stdint.h>
#include <string.h>
/**
* @file code.c
* @brief Implementation of concatenated code
*/



/**
*
* @brief Encoding the message m to a code word em using the concatenated code
*
* First we encode the message using the Reed-Solomon code, then with the duplicated Reed-Muller code we obtain
* a concatenated code word.
*
* @param[out] em Pointer to an array that is the tensor code word
* @param[in] m Pointer to an array that is the message
*/
void PQCLEAN_HQCRMRS256_AVX2_code_encode(uint8_t *em, const uint8_t *m) {
uint8_t tmp[8 * VEC_N1_SIZE_64] = {0};

PQCLEAN_HQCRMRS256_AVX2_reed_solomon_encode(tmp, m);
PQCLEAN_HQCRMRS256_AVX2_reed_muller_encode(em, tmp);

}



/**
* @brief Decoding the code word em to a message m using the concatenated code
*
* @param[out] m Pointer to an array that is the message
* @param[in] em Pointer to an array that is the code word
*/
void PQCLEAN_HQCRMRS256_AVX2_code_decode(uint8_t *m, const uint8_t *em) {
uint8_t tmp[8 * VEC_N1_SIZE_64] = {0};

PQCLEAN_HQCRMRS256_AVX2_reed_muller_decode(tmp, em);
PQCLEAN_HQCRMRS256_AVX2_reed_solomon_decode(m, tmp);


}

+ 18
- 0
src/kem/hqc/hqc-rmrs-256/avx2/code.h Целия файл

@@ -0,0 +1,18 @@
#ifndef CODE_H
#define CODE_H


/**
* @file code.h
* Header file of code.c
*/
#include "parameters.h"
#include <stddef.h>
#include <stdint.h>

void PQCLEAN_HQCRMRS256_AVX2_code_encode(uint8_t *em, const uint8_t *message);

void PQCLEAN_HQCRMRS256_AVX2_code_decode(uint8_t *m, const uint8_t *em);


#endif

Някои файлове не бяха показани, защото твърде много файлове са промени

Зареждане…
Отказ
Запис