Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.
 
 
 

214 рядки
10 KiB

  1. /// @file rainbow_keypair_computation.c
  2. /// @brief Implementations for functions in rainbow_keypair_computation.h
  3. ///
  4. #include "rainbow_keypair_computation.h"
  5. #include "blas.h"
  6. #include "blas_comm.h"
  7. #include "rainbow_blas.h"
  8. #include "rainbow_keypair.h"
  9. #include <stdint.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. void PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_extcpk_to_pk(pk_t *pk, const ext_cpk_t *cpk) {
  13. const unsigned char *idx_l1 = cpk->l1_Q1;
  14. const unsigned char *idx_l2 = cpk->l2_Q1;
  15. for (unsigned int i = 0; i < _V1; i++) {
  16. for (unsigned int j = i; j < _V1; j++) {
  17. unsigned int pub_idx = PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_idx_of_trimat(i, j, _PUB_N);
  18. memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE);
  19. memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE);
  20. idx_l1 += _O1_BYTE;
  21. idx_l2 += _O2_BYTE;
  22. }
  23. }
  24. idx_l1 = cpk->l1_Q2;
  25. idx_l2 = cpk->l2_Q2;
  26. for (unsigned int i = 0; i < _V1; i++) {
  27. for (unsigned int j = _V1; j < _V1 + _O1; j++) {
  28. unsigned int pub_idx = PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_idx_of_trimat(i, j, _PUB_N);
  29. memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE);
  30. memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE);
  31. idx_l1 += _O1_BYTE;
  32. idx_l2 += _O2_BYTE;
  33. }
  34. }
  35. idx_l1 = cpk->l1_Q3;
  36. idx_l2 = cpk->l2_Q3;
  37. for (unsigned int i = 0; i < _V1; i++) {
  38. for (unsigned int j = _V1 + _O1; j < _PUB_N; j++) {
  39. unsigned int pub_idx = PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_idx_of_trimat(i, j, _PUB_N);
  40. memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE);
  41. memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE);
  42. idx_l1 += _O1_BYTE;
  43. idx_l2 += _O2_BYTE;
  44. }
  45. }
  46. idx_l1 = cpk->l1_Q5;
  47. idx_l2 = cpk->l2_Q5;
  48. for (unsigned int i = _V1; i < _V1 + _O1; i++) {
  49. for (unsigned int j = i; j < _V1 + _O1; j++) {
  50. unsigned int pub_idx = PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_idx_of_trimat(i, j, _PUB_N);
  51. memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE);
  52. memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE);
  53. idx_l1 += _O1_BYTE;
  54. idx_l2 += _O2_BYTE;
  55. }
  56. }
  57. idx_l1 = cpk->l1_Q6;
  58. idx_l2 = cpk->l2_Q6;
  59. for (unsigned int i = _V1; i < _V1 + _O1; i++) {
  60. for (unsigned int j = _V1 + _O1; j < _PUB_N; j++) {
  61. unsigned int pub_idx = PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_idx_of_trimat(i, j, _PUB_N);
  62. memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE);
  63. memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE);
  64. idx_l1 += _O1_BYTE;
  65. idx_l2 += _O2_BYTE;
  66. }
  67. }
  68. idx_l1 = cpk->l1_Q9;
  69. idx_l2 = cpk->l2_Q9;
  70. for (unsigned int i = _V1 + _O1; i < _PUB_N; i++) {
  71. for (unsigned int j = i; j < _PUB_N; j++) {
  72. unsigned int pub_idx = PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_idx_of_trimat(i, j, _PUB_N);
  73. memcpy(&pk->pk[_PUB_M_BYTE * pub_idx], idx_l1, _O1_BYTE);
  74. memcpy((&pk->pk[_PUB_M_BYTE * pub_idx]) + _O1_BYTE, idx_l2, _O2_BYTE);
  75. idx_l1 += _O1_BYTE;
  76. idx_l2 += _O2_BYTE;
  77. }
  78. }
  79. }
  80. static void calculate_F_from_Q_ref(sk_t *Fs, const sk_t *Qs, sk_t *Ts) {
  81. // Layer 1
  82. // F_sk.l1_F1s[i] = Q_pk.l1_F1s[i]
  83. memcpy(Fs->l1_F1, Qs->l1_F1, _O1_BYTE * N_TRIANGLE_TERMS(_V1));
  84. // F_sk.l1_F2s[i] = ( Q_pk.l1_F1s[i] + Q_pk.l1_F1s[i].transpose() ) * T_sk.t1 + Q_pk.l1_F2s[i]
  85. memcpy(Fs->l1_F2, Qs->l1_F2, _O1_BYTE * _V1 * _O1);
  86. batch_2trimat_madd(Fs->l1_F2, Qs->l1_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O1_BYTE);
  87. /*
  88. Layer 2
  89. computations:
  90. F_sk.l2_F1s[i] = Q_pk.l2_F1s[i]
  91. Q1_T1 = Q_pk.l2_F1s[i]*T_sk.t1
  92. F_sk.l2_F2s[i] = Q1_T1 + Q_pk.l2_F2s[i] + Q_pk.l2_F1s[i].transpose() * T_sk.t1
  93. F_sk.l2_F5s[i] = UT( t1_tr* ( Q1_T1 + Q_pk.l2_F2s[i] ) ) + Q_pk.l2_F5s[i]
  94. Q1_Q1T_T4 = (Q_pk.l2_F1s[i] + Q_pk.l2_F1s[i].transpose()) * t4
  95. #Q1_Q1T_T4 = Q1_Q1T * t4
  96. Q2_T3 = Q_pk.l2_F2s[i]*T_sk.t3
  97. F_sk.l2_F3s[i] = Q1_Q1T_T4 + Q2_T3 + Q_pk.l2_F3s[i]
  98. F_sk.l2_F6s[i] = t1_tr * ( Q1_Q1T_T4 + Q2_T3 + Q_pk.l2_F3s[i] )
  99. + Q_pk.l2_F2s[i].transpose() * t4
  100. + (Q_pk.l2_F5s[i] + Q_pk.l2_F5s[i].transpose())*T_sk.t3 + Q_pk.l2_F6s[i]
  101. */
  102. memcpy(Fs->l2_F1, Qs->l2_F1, _O2_BYTE * N_TRIANGLE_TERMS(_V1)); // F_sk.l2_F1s[i] = Q_pk.l2_F1s[i]
  103. // F_sk.l2_F2s[i] = Q1_T1 + Q_pk.l2_F2s[i] + Q_pk.l2_F1s[i].transpose() * T_sk.t1
  104. // F_sk.l2_F5s[i] = UT( t1_tr* ( Q1_T1 + Q_pk.l2_F2s[i] ) ) + Q_pk.l2_F5s[i]
  105. memcpy(Fs->l2_F2, Qs->l2_F2, _O2_BYTE * _V1 * _O1);
  106. batch_trimat_madd(Fs->l2_F2, Qs->l2_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O2_BYTE); // Q1_T1+ Q2
  107. unsigned char tempQ[_O1 * _O1 * _O2_BYTE + 32];
  108. memset(tempQ, 0, _O1 * _O1 * _O2_BYTE);
  109. batch_matTr_madd(tempQ, Ts->t1, _V1, _V1_BYTE, _O1, Fs->l2_F2, _O1, _O2_BYTE); // t1_tr*(Q1_T1+Q2)
  110. memcpy(Fs->l2_F5, Qs->l2_F5, _O2_BYTE * N_TRIANGLE_TERMS(_O1)); // F5
  111. PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_UpperTrianglize(Fs->l2_F5, tempQ, _O1, _O2_BYTE); // UT( ... )
  112. batch_trimatTr_madd(Fs->l2_F2, Qs->l2_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O2_BYTE); // F2 = Q1_T1 + Q2 + Q1^tr*t1
  113. // Q1_Q1T_T4 = (Q_pk.l2_F1s[i] + Q_pk.l2_F1s[i].transpose()) * t4
  114. // Q2_T3 = Q_pk.l2_F2s[i]*T_sk.t3
  115. // F_sk.l2_F3s[i] = Q1_Q1T_T4 + Q2_T3 + Q_pk.l2_F3s[i]
  116. memcpy(Fs->l2_F3, Qs->l2_F3, _V1 * _O2 * _O2_BYTE);
  117. batch_2trimat_madd(Fs->l2_F3, Qs->l2_F1, Ts->t4, _V1, _V1_BYTE, _O2, _O2_BYTE); // Q1_Q1T_T4
  118. batch_mat_madd(Fs->l2_F3, Qs->l2_F2, _V1, Ts->t3, _O1, _O1_BYTE, _O2, _O2_BYTE); // Q2_T3
  119. // F_sk.l2_F6s[i] = t1_tr * ( Q1_Q1T_T4 + Q2_T3 + Q_pk.l2_F3s[i] )
  120. // + Q_pk.l2_F2s[i].transpose() * t4
  121. // + (Q_pk.l2_F5s[i] + Q_pk.l2_F5s[i].transpose())*T_sk.t3 + Q_pk.l2_F6s[i]
  122. memcpy(Fs->l2_F6, Qs->l2_F6, _O1 * _O2 * _O2_BYTE);
  123. batch_matTr_madd(Fs->l2_F6, Ts->t1, _V1, _V1_BYTE, _O1, Fs->l2_F3, _O2, _O2_BYTE); // t1_tr * ( Q1_Q1T_T4 + Q2_T3 + Q_pk.l2_F3s[i] )
  124. batch_2trimat_madd(Fs->l2_F6, Qs->l2_F5, Ts->t3, _O1, _O1_BYTE, _O2, _O2_BYTE); // (Q_pk.l2_F5s[i] + Q_pk.l2_F5s[i].transpose())*T_sk.t3
  125. batch_bmatTr_madd(Fs->l2_F6, Qs->l2_F2, _O1, Ts->t4, _V1, _V1_BYTE, _O2, _O2_BYTE);
  126. }
  127. static void calculate_Q_from_F_cyclic_ref(cpk_t *Qs, const sk_t *Fs, const sk_t *Ts) {
  128. // Layer 1: Computing Q5, Q3, Q6, Q9
  129. // Q_pk.l1_F5s[i] = UT( T1tr* (F1 * T1 + F2) )
  130. const unsigned char *t2 = Ts->t4;
  131. sk_t tempQ;
  132. memcpy(tempQ.l1_F2, Fs->l1_F2, _O1_BYTE * _V1 * _O1);
  133. batch_trimat_madd(tempQ.l1_F2, Fs->l1_F1, Ts->t1, _V1, _V1_BYTE, _O1, _O1_BYTE); // F1*T1 + F2
  134. memset(tempQ.l2_F1, 0, sizeof(tempQ.l2_F1));
  135. memset(tempQ.l2_F2, 0, sizeof(tempQ.l2_F2));
  136. batch_matTr_madd(tempQ.l2_F1, Ts->t1, _V1, _V1_BYTE, _O1, tempQ.l1_F2, _O1, _O1_BYTE); // T1tr*(F1*T1 + F2)
  137. memset(Qs->l1_Q5, 0, _O1_BYTE * N_TRIANGLE_TERMS(_O1));
  138. PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_UpperTrianglize(Qs->l1_Q5, tempQ.l2_F1, _O1, _O1_BYTE); // UT( ... ) // Q5
  139. /*
  140. F1_T2 = F1 * t2
  141. F2_T3 = F2 * t3
  142. F1_F1T_T2 + F2_T3 = F1_T2 + F2_T3 + F1tr * t2
  143. Q_pk.l1_F3s[i] = F1_F1T_T2 + F2_T3
  144. Q_pk.l1_F6s[i] = T1tr* ( F1_F1T_T2 + F2_T3 ) + F2tr * t2
  145. Q_pk.l1_F9s[i] = UT( T2tr* ( F1_T2 + F2_T3 ) )
  146. */
  147. memset(Qs->l1_Q3, 0, _O1_BYTE * _V1 * _O2);
  148. memset(Qs->l1_Q6, 0, _O1_BYTE * _O1 * _O2);
  149. memset(Qs->l1_Q9, 0, _O1_BYTE * N_TRIANGLE_TERMS(_O2));
  150. batch_trimat_madd(Qs->l1_Q3, Fs->l1_F1, t2, _V1, _V1_BYTE, _O2, _O1_BYTE); // F1*T2
  151. batch_mat_madd(Qs->l1_Q3, Fs->l1_F2, _V1, Ts->t3, _O1, _O1_BYTE, _O2, _O1_BYTE); // F1_T2 + F2_T3
  152. memset(tempQ.l1_F2, 0, _O1_BYTE * _V1 * _O2); // should be F3. assuming: _O1 >= _O2
  153. batch_matTr_madd(tempQ.l1_F2, t2, _V1, _V1_BYTE, _O2, Qs->l1_Q3, _O2, _O1_BYTE); // T2tr * ( F1_T2 + F2_T3 )
  154. PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_UpperTrianglize(Qs->l1_Q9, tempQ.l1_F2, _O2, _O1_BYTE); // Q9
  155. batch_trimatTr_madd(Qs->l1_Q3, Fs->l1_F1, t2, _V1, _V1_BYTE, _O2, _O1_BYTE); // F1_F1T_T2 + F2_T3 // Q3
  156. batch_bmatTr_madd(Qs->l1_Q6, Fs->l1_F2, _O1, t2, _V1, _V1_BYTE, _O2, _O1_BYTE); // F2tr*T2
  157. batch_matTr_madd(Qs->l1_Q6, Ts->t1, _V1, _V1_BYTE, _O1, Qs->l1_Q3, _O2, _O1_BYTE); // Q6
  158. /*
  159. Layer 2
  160. Computing Q9:
  161. F1_T2 = F1 * t2
  162. F2_T3 = F2 * t3
  163. Q9 = UT( T2tr*( F1*T2 + F2*T3 + F3 ) + T3tr*( F5*T3 + F6 ) )
  164. */
  165. sk_t tempQ2;
  166. memcpy(tempQ2.l2_F3, Fs->l2_F3, _O2_BYTE * _V1 * _O2); /// F3 actually.
  167. batch_trimat_madd(tempQ2.l2_F3, Fs->l2_F1, t2, _V1, _V1_BYTE, _O2, _O2_BYTE); // F1*T2 + F3
  168. batch_mat_madd(tempQ2.l2_F3, Fs->l2_F2, _V1, Ts->t3, _O1, _O1_BYTE, _O2, _O2_BYTE); // F1_T2 + F2_T3 + F3
  169. memset(tempQ.l2_F3, 0, _O2_BYTE * _V1 * _O2);
  170. batch_matTr_madd(tempQ.l2_F3, t2, _V1, _V1_BYTE, _O2, tempQ2.l2_F3, _O2, _O2_BYTE); // T2tr * ( ..... )
  171. memcpy(tempQ.l2_F6, Fs->l2_F6, _O2_BYTE * _O1 * _O2);
  172. batch_trimat_madd(tempQ.l2_F6, Fs->l2_F5, Ts->t3, _O1, _O1_BYTE, _O2, _O2_BYTE); // F5*T3 + F6
  173. batch_matTr_madd(tempQ.l2_F3, Ts->t3, _O1, _O1_BYTE, _O2, tempQ.l2_F6, _O2, _O2_BYTE); // T2tr*( ..... ) + T3tr*( ..... )
  174. memset(Qs->l2_Q9, 0, _O2_BYTE * N_TRIANGLE_TERMS(_O2));
  175. PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_UpperTrianglize(Qs->l2_Q9, tempQ.l2_F3, _O2, _O2_BYTE); // Q9
  176. }
  177. // Choosing implementations depends on the macros: _BLAS_SSE_ and _BLAS_AVX2_
  178. #define calculate_F_from_Q_impl calculate_F_from_Q_ref
  179. #define calculate_Q_from_F_cyclic_impl calculate_Q_from_F_cyclic_ref
  180. void PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_calculate_F_from_Q(sk_t *Fs, const sk_t *Qs, sk_t *Ts) {
  181. calculate_F_from_Q_impl(Fs, Qs, Ts);
  182. }
  183. void PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_calculate_Q_from_F_cyclic(cpk_t *Qs, const sk_t *Fs, const sk_t *Ts) {
  184. calculate_Q_from_F_cyclic_impl(Qs, Fs, Ts);
  185. }