You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

253 lines
8.2 KiB

  1. #include "gf2x_arith.h"
  2. #include <string.h> // memset(...)
  3. void PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_add(DIGIT Res[], const DIGIT A[], const DIGIT B[], size_t n) {
  4. for (size_t i = 0; i < n; i++) {
  5. Res[i] = A[i] ^ B[i];
  6. }
  7. }
  8. /* copies len digits from a to r if b == 1 */
  9. void PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_cmov(DIGIT *r, const DIGIT *a, size_t len, int c) {
  10. DIGIT mask = (DIGIT)(-c);
  11. for (size_t i = 0; i < len; i++) {
  12. r[i] ^= mask & (a[i] ^ r[i]);
  13. }
  14. }
  15. /* PRE: MAX ALLOWED ROTATION AMOUNT : DIGIT_SIZE_b */
  16. void PQCLEAN_LEDAKEMLT12_LEAKTIME_right_bit_shift_n(size_t length, DIGIT in[], size_t amount) {
  17. if ( amount == 0 ) {
  18. return;
  19. }
  20. size_t j;
  21. DIGIT mask;
  22. mask = ((DIGIT)0x01 << amount) - 1;
  23. for (j = length - 1; j > 0; j--) {
  24. in[j] >>= amount;
  25. in[j] |= (in[j - 1] & mask) << (DIGIT_SIZE_b - amount);
  26. }
  27. in[j] >>= amount;
  28. }
  29. /* PRE: MAX ALLOWED ROTATION AMOUNT : DIGIT_SIZE_b */
  30. void PQCLEAN_LEDAKEMLT12_LEAKTIME_left_bit_shift_n(size_t length, DIGIT in[], size_t amount) {
  31. if ( amount == 0 ) {
  32. return;
  33. }
  34. size_t j;
  35. DIGIT mask;
  36. mask = ~(((DIGIT)0x01 << (DIGIT_SIZE_b - amount)) - 1);
  37. for (j = 0 ; j < length - 1; j++) {
  38. in[j] <<= amount;
  39. in[j] |= (in[j + 1] & mask) >> (DIGIT_SIZE_b - amount);
  40. }
  41. in[j] <<= amount;
  42. }
  43. static void gf2x_mul1(DIGIT *R, const DIGIT A, const DIGIT B) {
  44. DIGIT tmp;
  45. R[0] = 0;
  46. R[1] = (A & 1) * B;
  47. for (uint8_t i = 1; i < DIGIT_SIZE_b; i++) {
  48. tmp = ((A >> i) & 1) * B;
  49. R[1] ^= tmp << i;
  50. R[0] ^= tmp >> (DIGIT_SIZE_b - i);
  51. }
  52. }
  53. static void gf2x_mul_n(DIGIT *R, const DIGIT *A, const DIGIT *B, size_t n) {
  54. DIGIT tmp[2];
  55. memset(R, 0x00, 2 * n * sizeof(DIGIT));
  56. for (size_t i = 0; i < n; i++) {
  57. for (size_t j = 0; j < n; j++) {
  58. gf2x_mul1(tmp, A[i], B[j]);
  59. R[i + j] ^= tmp[0];
  60. R[i + j + 1] ^= tmp[1];
  61. }
  62. }
  63. }
  64. static void gf2x_cpy(DIGIT *R, const DIGIT *A, size_t len) {
  65. for (size_t i = 0; i < len; i++) {
  66. R[i] = A[i];
  67. }
  68. }
  69. /* Accumulate */
  70. #define gf2x_add(R, A, B, n) PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_add(R, A, B, n)
  71. #define gf2x_acc(R, B, n) PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_add(R, R, B, n)
  72. /* allows the operands to be of different size
  73. * first operand must be the bigger one.
  74. * aligns last array elements */
  75. static inline void gf2x_add_asymm(DIGIT *R,
  76. size_t na, const DIGIT *A,
  77. size_t nb, const DIGIT *B) {
  78. size_t delta = na - nb;
  79. gf2x_cpy(R, A, delta);
  80. gf2x_add(R + delta, A + delta, B, nb);;
  81. }
  82. /* aligns first array elements */
  83. static inline void gf2x_add_asymm2(DIGIT *R,
  84. size_t na, const DIGIT *A,
  85. size_t nb, const DIGIT *B) {
  86. size_t delta = na - nb;
  87. gf2x_add(R, A, B, nb);
  88. gf2x_cpy(R + nb, A + nb, delta);
  89. }
  90. /* Karatsuba with lowered space complexity
  91. * T(n) = 3 * ceil(n/2) + T(ceil(n / 2)) */
  92. static void gf2x_mul_kar(DIGIT *R,
  93. const DIGIT *A,
  94. const DIGIT *B,
  95. size_t n,
  96. DIGIT *stack) {
  97. if (n < MIN_KAR_DIGITS) {
  98. gf2x_mul_n(R, A, B, n);
  99. return;
  100. }
  101. size_t l = (n + 1) / 2; // limb size = ceil(n / 2)
  102. size_t d = n & 1;
  103. const DIGIT *a1 = A; // length n - d
  104. const DIGIT *a0 = A + l - d; // length n
  105. const DIGIT *b1 = B;
  106. const DIGIT *b0 = B + l - d;
  107. DIGIT *aa = stack;
  108. DIGIT *bb = aa + l;
  109. DIGIT *cc = bb + l;
  110. stack = cc + l; // 3l space requirement at each level
  111. DIGIT *c3 = R + l - 2 * d;
  112. DIGIT *c2 = c3 + l;
  113. DIGIT *c1 = c2 + l;
  114. gf2x_mul_kar(c2, a0, b0, l, stack); // L in low part of R
  115. gf2x_mul_kar(R, a1, b1, l - d, stack); // H in higher part of R
  116. gf2x_add_asymm(aa, l, a0, l - d, a1); // AH + AL
  117. gf2x_add_asymm(bb, l, b0, l - d, b1); // BH + BL
  118. gf2x_add(cc, c3, c2, l); // HL + LH in cc
  119. gf2x_mul_kar(c3, aa, bb, l, stack); // M = (AH + AL) x (BH + BL)
  120. gf2x_add_asymm(c3, l, c3, l - 2 * d, R); // add HH
  121. gf2x_acc(c2, c1, l); // add LL
  122. gf2x_acc(c3, cc, l); // add HL + LH
  123. gf2x_acc(c2, cc, l); // add HL + LH
  124. }
  125. static void gf2x_div_w_plus_one(DIGIT *A, size_t n) {
  126. size_t i;
  127. for (i = 0; i < n - 2; i++) {
  128. A[i + 1] ^= A[i]; // runs n - 2 times
  129. }
  130. }
  131. static void gf2x_shift_left_w(DIGIT *A, size_t n) {
  132. size_t i;
  133. for (i = 0; i < n - 1; i++) {
  134. A[i] = A[i + 1];
  135. }
  136. A[i] = 0;
  137. }
  138. /* Word-aligned Toom-Cook 3, source:
  139. * Brent, Richard P., et al. "Faster multiplication in GF (2)[x]."
  140. * International Algorithmic Number Theory Symposium.
  141. * Springer, Berlin, Heidelberg, 2008. */
  142. static void gf2x_mul_tc3w(DIGIT *R,
  143. const DIGIT *A,
  144. const DIGIT *B,
  145. size_t n,
  146. DIGIT *stack) {
  147. if (n < MIN_TOOM_DIGITS) {
  148. gf2x_mul_kar(R, A, B, n, stack);
  149. return;
  150. }
  151. size_t l = (n + 2) / 3; // size of a0, a1, b0, b1
  152. size_t r = n - 2 * l; // remaining sizes (a2, b2)
  153. size_t x = 2 * l + 4; // size of c1, c2, c3, c4
  154. size_t z = r + 2 > l + 1 ? r + 2 : l + 1; // size of c5
  155. const DIGIT *a0 = A;
  156. const DIGIT *a1 = A + l;
  157. const DIGIT *a2 = A + 2 * l;
  158. const DIGIT *b0 = B;
  159. const DIGIT *b1 = B + l;
  160. const DIGIT *b2 = B + 2 * l;
  161. DIGIT *c0 = R; // c0 and c4 in the result
  162. DIGIT *c4 = R + 4 * l;
  163. DIGIT *c1 = stack; // the rest in the stack
  164. DIGIT *c2 = c1 + x;
  165. DIGIT *c3 = c2 + x;
  166. DIGIT *c5 = c3 + x;
  167. stack = c5 + z; // Worst-case 7l + 14
  168. // Evaluation
  169. c0[0] = 0; // c0[z] = a1*W + a2*W^2
  170. c0[l + 1] = 0;
  171. gf2x_cpy(c0 + 1, a1, l);
  172. gf2x_acc(c0 + 2, a2, r);
  173. c4[0] = 0; // c4[z] = b1*W + b2*W^2
  174. c4[l + 1] = 0;
  175. gf2x_cpy(c4 + 1, b1, l);
  176. gf2x_acc(c4 + 2, b2, r);
  177. gf2x_cpy(c5, a0, l); // c5[l] = a0 + a1 + a2
  178. gf2x_acc(c5, a1, l);
  179. gf2x_acc(c5, a2, r);
  180. gf2x_cpy(c2, b0, l); // c2[l] = b0 + b1 + b2
  181. gf2x_acc(c2, b1, l);
  182. gf2x_acc(c2, b2, r);
  183. gf2x_mul_tc3w(c1, c2, c5, l, stack); // c1[2l] = c2 * c5
  184. gf2x_add_asymm2(c5, z, c0, l, c5); // c5[z] += c0, z >= l
  185. gf2x_add_asymm2(c2, z, c4, l, c2); // c2[z] += c4, idem
  186. gf2x_acc(c0, a0, l); // c0[l] += a0
  187. gf2x_acc(c4, b0, l); // c4[l] += b0
  188. gf2x_mul_tc3w(c3, c2, c5, z, stack); // c3[2z] = c2 * c5
  189. gf2x_mul_tc3w(c2, c0, c4, z, stack); // c2[2z] = c0 * c4
  190. gf2x_mul_tc3w(c0, a0, b0, l, stack); // c0[2l] = a0 * b0
  191. gf2x_mul_tc3w(c4, a2, b2, r, stack); // c4[2r] = a2 * b2
  192. // Interpolation
  193. gf2x_acc(c3, c2, 2 * z); // c3[2z] += c2
  194. gf2x_acc(c2, c0, 2 * l); // c2[2z] += c0
  195. gf2x_shift_left_w(c2, 2 * z); // c2[2z] = c2/y + c3
  196. gf2x_acc(c2, c3, 2 * z);
  197. gf2x_acc(c2, c4, 2 * r); // c2[2z] += c4 + c4**3
  198. gf2x_acc(c2 + 3, c4, 2 * r);
  199. gf2x_div_w_plus_one(c2, 2 * z); // c2[2z-1] = c2/(W+1)
  200. gf2x_acc(c1, c0, 2 * l); // c1[2l] += c0
  201. gf2x_acc(c3, c1, 2 * l); // c3[2z] += c1
  202. gf2x_shift_left_w(c3, 2 * z); // c3[2z-2] = c3/(W^2 + W)
  203. gf2x_div_w_plus_one(c3, 2 * z - 1);
  204. gf2x_add_asymm2(c1, 2 * z, c2, 2 * l, c1); // c1[2z-1] += c2 + c4
  205. gf2x_acc(c1, c4, 2 * r); // size c2 >= c1 >= c4
  206. gf2x_acc(c2, c3, 2 * z - 1); // c2[2z-1] += c3
  207. // Recombination
  208. gf2x_cpy(R + 2 * l, c2, 2 * l);
  209. gf2x_acc(R + l, c1, 2 * z - 1);
  210. gf2x_acc(R + 3 * l, c3, 2 * z - 1);
  211. }
  212. void PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_mul(DIGIT *R, const DIGIT *A, const DIGIT *B, size_t n) {
  213. DIGIT stack[STACK_WORDS];
  214. gf2x_mul_tc3w(R, A, B, n, stack);
  215. }