You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

126 line
5.6 KiB

  1. /********************************************************************************************
  2. * FrodoKEM: Learning with Errors Key Encapsulation
  3. *
  4. * Abstract: matrix arithmetic functions used by the KEM
  5. *********************************************************************************************/
  6. #include <stdint.h>
  7. #include <string.h>
  8. #include "aes.h"
  9. #include "api.h"
  10. #include "common.h"
  11. #include "params.h"
  12. int PQCLEAN_FRODOKEM976AES_OPT_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
  13. // Generate-and-multiply: generate matrix A (N x N) row-wise, multiply by s on the right.
  14. // Inputs: s, e (N x N_BAR)
  15. // Output: out = A*s + e (N x N_BAR)
  16. int k;
  17. uint16_t i, j;
  18. int16_t a_row[4 * PARAMS_N];
  19. for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) {
  20. *((uint32_t *)&out[i]) = *((uint32_t *)&e[i]);
  21. }
  22. int16_t a_row_temp[4 * PARAMS_N] = {0}; // Take four lines of A at once
  23. aes128ctx ctx128;
  24. aes128_keyexp(&ctx128, seed_A);
  25. for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) {
  26. a_row_temp[j + 1 + 0 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(j); // Loading values in the little-endian order
  27. a_row_temp[j + 1 + 1 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(j);
  28. a_row_temp[j + 1 + 2 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(j);
  29. a_row_temp[j + 1 + 3 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(j);
  30. }
  31. for (i = 0; i < PARAMS_N; i += 4) {
  32. for (j = 0; j < PARAMS_N; j += PARAMS_STRIPE_STEP) { // Go through A, four rows at a time
  33. a_row_temp[j + 0 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(i + 0); // Loading values in the little-endian order
  34. a_row_temp[j + 1 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(i + 1);
  35. a_row_temp[j + 2 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(i + 2);
  36. a_row_temp[j + 3 * PARAMS_N] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(i + 3);
  37. }
  38. aes128_ecb((uint8_t *)a_row, (uint8_t *)a_row_temp, 4 * PARAMS_N * sizeof(int16_t) / AES_BLOCKBYTES, &ctx128);
  39. for (k = 0; k < 4 * PARAMS_N; k++) {
  40. a_row[k] = PQCLEAN_FRODOKEM976AES_OPT_LE_TO_UINT16(a_row[k]);
  41. }
  42. for (k = 0; k < PARAMS_NBAR; k++) {
  43. uint16_t sum[4] = {0};
  44. for (j = 0; j < PARAMS_N; j++) { // Matrix-vector multiplication
  45. uint16_t sp = s[k * PARAMS_N + j];
  46. sum[0] += a_row[0 * PARAMS_N + j] * sp; // Go through four lines with same s
  47. sum[1] += a_row[1 * PARAMS_N + j] * sp;
  48. sum[2] += a_row[2 * PARAMS_N + j] * sp;
  49. sum[3] += a_row[3 * PARAMS_N + j] * sp;
  50. }
  51. out[(i + 0)*PARAMS_NBAR + k] += sum[0];
  52. out[(i + 2)*PARAMS_NBAR + k] += sum[2];
  53. out[(i + 1)*PARAMS_NBAR + k] += sum[1];
  54. out[(i + 3)*PARAMS_NBAR + k] += sum[3];
  55. }
  56. }
  57. return 1;
  58. }
  59. int PQCLEAN_FRODOKEM976AES_OPT_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) {
  60. // Generate-and-multiply: generate matrix A (N x N) column-wise, multiply by s' on the left.
  61. // Inputs: s', e' (N_BAR x N)
  62. // Output: out = s'*A + e' (N_BAR x N)
  63. int j;
  64. uint16_t i, kk;
  65. for (i = 0; i < (PARAMS_N * PARAMS_NBAR); i += 2) {
  66. *((uint32_t *)&out[i]) = *((uint32_t *)&e[i]);
  67. }
  68. int k;
  69. uint16_t a_cols[PARAMS_N * PARAMS_STRIPE_STEP] = {0};
  70. uint16_t a_cols_t[PARAMS_N * PARAMS_STRIPE_STEP];
  71. uint16_t a_cols_temp[PARAMS_N * PARAMS_STRIPE_STEP] = {0};
  72. aes128ctx ctx128;
  73. aes128_keyexp(&ctx128, seed_A);
  74. for (i = 0, j = 0; i < PARAMS_N; i++, j += PARAMS_STRIPE_STEP) {
  75. a_cols_temp[j] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(i); // Loading values in the little-endian order
  76. }
  77. for (kk = 0; kk < PARAMS_N; kk += PARAMS_STRIPE_STEP) { // Go through A's columns, 8 (== PARAMS_STRIPE_STEP) columns at a time.
  78. for (i = 0; i < (PARAMS_N * PARAMS_STRIPE_STEP); i += PARAMS_STRIPE_STEP) {
  79. a_cols_temp[i + 1] = PQCLEAN_FRODOKEM976AES_OPT_UINT16_TO_LE(kk); // Loading values in the little-endian order
  80. }
  81. aes128_ecb((uint8_t *)a_cols, (uint8_t *)a_cols_temp, PARAMS_N * PARAMS_STRIPE_STEP * sizeof(int16_t) / AES_BLOCKBYTES, &ctx128);
  82. for (i = 0; i < PARAMS_N; i++) { // Transpose a_cols to have access to it in the column-major order.
  83. for (k = 0; k < PARAMS_STRIPE_STEP; k++) {
  84. a_cols_t[k * PARAMS_N + i] = PQCLEAN_FRODOKEM976AES_OPT_LE_TO_UINT16(a_cols[i * PARAMS_STRIPE_STEP + k]);
  85. }
  86. }
  87. for (i = 0; i < PARAMS_NBAR; i++) {
  88. for (k = 0; k < PARAMS_STRIPE_STEP; k += PARAMS_PARALLEL) {
  89. uint16_t sum[PARAMS_PARALLEL] = {0};
  90. for (j = 0; j < PARAMS_N; j++) { // Matrix-vector multiplication
  91. uint16_t sp = s[i * PARAMS_N + j];
  92. sum[0] += sp * a_cols_t[(k + 0) * PARAMS_N + j];
  93. sum[1] += sp * a_cols_t[(k + 1) * PARAMS_N + j];
  94. sum[2] += sp * a_cols_t[(k + 2) * PARAMS_N + j];
  95. sum[3] += sp * a_cols_t[(k + 3) * PARAMS_N + j];
  96. }
  97. out[i * PARAMS_N + kk + k + 0] += sum[0];
  98. out[i * PARAMS_N + kk + k + 2] += sum[2];
  99. out[i * PARAMS_N + kk + k + 1] += sum[1];
  100. out[i * PARAMS_N + kk + k + 3] += sum[3];
  101. }
  102. }
  103. }
  104. return 1;
  105. }