Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
10 роки тому
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. /* Copyright (c) 2014, Google Inc.
  2. *
  3. * Permission to use, copy, modify, and/or distribute this software for any
  4. * purpose with or without fee is hereby granted, provided that the above
  5. * copyright notice and this permission notice appear in all copies.
  6. *
  7. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  8. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  9. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  10. * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  11. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
  12. * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  13. * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
  14. /* This implementation was taken from the public domain, neon2 version in
  15. * SUPERCOP by D. J. Bernstein and Peter Schwabe. */
  16. #include <openssl/poly1305.h>
  17. #if defined(OPENSSL_ARM) && !defined(OPENSSL_NO_ASM)
  18. #include <string.h>
  19. typedef struct {
  20. uint32_t v[12]; /* for alignment; only using 10 */
  21. } fe1305x2;
  22. #define addmulmod openssl_poly1305_neon2_addmulmod
  23. #define blocks openssl_poly1305_neon2_blocks
  24. extern void addmulmod(fe1305x2 *r, const fe1305x2 *x, const fe1305x2 *y,
  25. const fe1305x2 *c);
  26. extern int blocks(fe1305x2 *h, const fe1305x2 *precomp, const uint8_t *in,
  27. unsigned int inlen);
  28. static void freeze(fe1305x2 *r) {
  29. int i;
  30. uint32_t x0 = r->v[0];
  31. uint32_t x1 = r->v[2];
  32. uint32_t x2 = r->v[4];
  33. uint32_t x3 = r->v[6];
  34. uint32_t x4 = r->v[8];
  35. uint32_t y0;
  36. uint32_t y1;
  37. uint32_t y2;
  38. uint32_t y3;
  39. uint32_t y4;
  40. uint32_t swap;
  41. for (i = 0; i < 3; ++i) {
  42. x1 += x0 >> 26;
  43. x0 &= 0x3ffffff;
  44. x2 += x1 >> 26;
  45. x1 &= 0x3ffffff;
  46. x3 += x2 >> 26;
  47. x2 &= 0x3ffffff;
  48. x4 += x3 >> 26;
  49. x3 &= 0x3ffffff;
  50. x0 += 5 * (x4 >> 26);
  51. x4 &= 0x3ffffff;
  52. }
  53. y0 = x0 + 5;
  54. y1 = x1 + (y0 >> 26);
  55. y0 &= 0x3ffffff;
  56. y2 = x2 + (y1 >> 26);
  57. y1 &= 0x3ffffff;
  58. y3 = x3 + (y2 >> 26);
  59. y2 &= 0x3ffffff;
  60. y4 = x4 + (y3 >> 26);
  61. y3 &= 0x3ffffff;
  62. swap = -(y4 >> 26);
  63. y4 &= 0x3ffffff;
  64. y0 ^= x0;
  65. y1 ^= x1;
  66. y2 ^= x2;
  67. y3 ^= x3;
  68. y4 ^= x4;
  69. y0 &= swap;
  70. y1 &= swap;
  71. y2 &= swap;
  72. y3 &= swap;
  73. y4 &= swap;
  74. y0 ^= x0;
  75. y1 ^= x1;
  76. y2 ^= x2;
  77. y3 ^= x3;
  78. y4 ^= x4;
  79. r->v[0] = y0;
  80. r->v[2] = y1;
  81. r->v[4] = y2;
  82. r->v[6] = y3;
  83. r->v[8] = y4;
  84. }
  85. static void fe1305x2_tobytearray(uint8_t *r, fe1305x2 *x) {
  86. uint32_t x0 = x->v[0];
  87. uint32_t x1 = x->v[2];
  88. uint32_t x2 = x->v[4];
  89. uint32_t x3 = x->v[6];
  90. uint32_t x4 = x->v[8];
  91. x1 += x0 >> 26;
  92. x0 &= 0x3ffffff;
  93. x2 += x1 >> 26;
  94. x1 &= 0x3ffffff;
  95. x3 += x2 >> 26;
  96. x2 &= 0x3ffffff;
  97. x4 += x3 >> 26;
  98. x3 &= 0x3ffffff;
  99. *(uint32_t *)r = x0 + (x1 << 26);
  100. *(uint32_t *)(r + 4) = (x1 >> 6) + (x2 << 20);
  101. *(uint32_t *)(r + 8) = (x2 >> 12) + (x3 << 14);
  102. *(uint32_t *)(r + 12) = (x3 >> 18) + (x4 << 8);
  103. }
  104. /* load32 exists to avoid breaking strict aliasing rules in
  105. * fe1305x2_frombytearray. */
  106. static uint32_t load32(uint8_t *t) {
  107. uint32_t tmp;
  108. memcpy(&tmp, t, sizeof(tmp));
  109. return tmp;
  110. }
  111. static void fe1305x2_frombytearray(fe1305x2 *r, const uint8_t *x,
  112. unsigned long long xlen) {
  113. int i;
  114. uint8_t t[17];
  115. for (i = 0; (i < 16) && (i < xlen); i++) {
  116. t[i] = x[i];
  117. }
  118. xlen -= i;
  119. x += i;
  120. t[i++] = 1;
  121. for (; i < 17; i++) {
  122. t[i] = 0;
  123. }
  124. r->v[0] = 0x3ffffff & load32(t);
  125. r->v[2] = 0x3ffffff & (load32(t + 3) >> 2);
  126. r->v[4] = 0x3ffffff & (load32(t + 6) >> 4);
  127. r->v[6] = 0x3ffffff & (load32(t + 9) >> 6);
  128. r->v[8] = load32(t + 13);
  129. if (xlen) {
  130. for (i = 0; (i < 16) && (i < xlen); i++) {
  131. t[i] = x[i];
  132. }
  133. t[i++] = 1;
  134. for (; i < 17; i++) {
  135. t[i] = 0;
  136. }
  137. r->v[1] = 0x3ffffff & load32(t);
  138. r->v[3] = 0x3ffffff & (load32(t + 3) >> 2);
  139. r->v[5] = 0x3ffffff & (load32(t + 6) >> 4);
  140. r->v[7] = 0x3ffffff & (load32(t + 9) >> 6);
  141. r->v[9] = load32(t + 13);
  142. } else {
  143. r->v[1] = r->v[3] = r->v[5] = r->v[7] = r->v[9] = 0;
  144. }
  145. }
  146. static const fe1305x2 zero __attribute__((aligned(16)));
  147. struct poly1305_state_st {
  148. uint8_t data[sizeof(fe1305x2[5]) + 128];
  149. uint8_t buf[32];
  150. unsigned int buf_used;
  151. uint8_t key[16];
  152. };
  153. void CRYPTO_poly1305_init_neon(poly1305_state *state, const uint8_t key[32]) {
  154. struct poly1305_state_st *st = (struct poly1305_state_st *)(state);
  155. fe1305x2 *const r = (fe1305x2 *)(st->data + (15 & (-(int)st->data)));
  156. fe1305x2 *const h = r + 1;
  157. fe1305x2 *const c = h + 1;
  158. fe1305x2 *const precomp = c + 1;
  159. unsigned int j;
  160. r->v[1] = r->v[0] = 0x3ffffff & *(uint32_t *)key;
  161. r->v[3] = r->v[2] = 0x3ffff03 & ((*(uint32_t *)(key + 3)) >> 2);
  162. r->v[5] = r->v[4] = 0x3ffc0ff & ((*(uint32_t *)(key + 6)) >> 4);
  163. r->v[7] = r->v[6] = 0x3f03fff & ((*(uint32_t *)(key + 9)) >> 6);
  164. r->v[9] = r->v[8] = 0x00fffff & ((*(uint32_t *)(key + 12)) >> 8);
  165. for (j = 0; j < 10; j++) {
  166. h->v[j] = 0; /* XXX: should fast-forward a bit */
  167. }
  168. addmulmod(precomp, r, r, &zero); /* precompute r^2 */
  169. addmulmod(precomp + 1, precomp, precomp, &zero); /* precompute r^4 */
  170. memcpy(st->key, key + 16, 16);
  171. st->buf_used = 0;
  172. }
  173. void CRYPTO_poly1305_update_neon(poly1305_state *state, const uint8_t *in,
  174. size_t in_len) {
  175. struct poly1305_state_st *st = (struct poly1305_state_st *)(state);
  176. fe1305x2 *const r = (fe1305x2 *)(st->data + (15 & (-(int)st->data)));
  177. fe1305x2 *const h = r + 1;
  178. fe1305x2 *const c = h + 1;
  179. fe1305x2 *const precomp = c + 1;
  180. unsigned int i;
  181. if (st->buf_used) {
  182. unsigned int todo = 32 - st->buf_used;
  183. if (todo > in_len) {
  184. todo = in_len;
  185. }
  186. for (i = 0; i < todo; i++) {
  187. st->buf[st->buf_used + i] = in[i];
  188. }
  189. st->buf_used += todo;
  190. in_len -= todo;
  191. in += todo;
  192. if (st->buf_used == sizeof(st->buf) && in_len) {
  193. addmulmod(h, h, precomp, &zero);
  194. fe1305x2_frombytearray(c, st->buf, sizeof(st->buf));
  195. for (i = 0; i < 10; i++) {
  196. h->v[i] += c->v[i];
  197. }
  198. st->buf_used = 0;
  199. }
  200. }
  201. while (in_len > 32) {
  202. unsigned int tlen = 1048576;
  203. if (in_len < tlen) {
  204. tlen = in_len;
  205. }
  206. tlen -= blocks(h, precomp, in, tlen);
  207. in_len -= tlen;
  208. in += tlen;
  209. }
  210. if (in_len) {
  211. for (i = 0; i < in_len; i++) {
  212. st->buf[i] = in[i];
  213. }
  214. st->buf_used = in_len;
  215. }
  216. }
  217. void CRYPTO_poly1305_finish_neon(poly1305_state *state, uint8_t mac[16]) {
  218. struct poly1305_state_st *st = (struct poly1305_state_st *)(state);
  219. fe1305x2 *const r = (fe1305x2 *)(st->data + (15 & (-(int)st->data)));
  220. fe1305x2 *const h = r + 1;
  221. fe1305x2 *const c = h + 1;
  222. fe1305x2 *const precomp = c + 1;
  223. addmulmod(h, h, precomp, &zero);
  224. if (st->buf_used > 16) {
  225. fe1305x2_frombytearray(c, st->buf, st->buf_used);
  226. precomp->v[1] = r->v[1];
  227. precomp->v[3] = r->v[3];
  228. precomp->v[5] = r->v[5];
  229. precomp->v[7] = r->v[7];
  230. precomp->v[9] = r->v[9];
  231. addmulmod(h, h, precomp, c);
  232. } else if (st->buf_used > 0) {
  233. fe1305x2_frombytearray(c, st->buf, st->buf_used);
  234. r->v[1] = 1;
  235. r->v[3] = 0;
  236. r->v[5] = 0;
  237. r->v[7] = 0;
  238. r->v[9] = 0;
  239. addmulmod(h, h, r, c);
  240. }
  241. h->v[0] += h->v[1];
  242. h->v[2] += h->v[3];
  243. h->v[4] += h->v[5];
  244. h->v[6] += h->v[7];
  245. h->v[8] += h->v[9];
  246. freeze(h);
  247. fe1305x2_frombytearray(c, st->key, 16);
  248. c->v[8] ^= (1 << 24);
  249. h->v[0] += c->v[0];
  250. h->v[2] += c->v[2];
  251. h->v[4] += c->v[4];
  252. h->v[6] += c->v[6];
  253. h->v[8] += c->v[8];
  254. fe1305x2_tobytearray(mac, h);
  255. }
  256. #endif /* OPENSSL_ARM && !OPENSSL_NO_ASM */