You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1207 lines
29 KiB

  1. /* ====================================================================
  2. * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in
  13. * the documentation and/or other materials provided with the
  14. * distribution.
  15. *
  16. * 3. All advertising materials mentioning features or use of this
  17. * software must display the following acknowledgment:
  18. * "This product includes software developed by the OpenSSL Project
  19. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  20. *
  21. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  22. * endorse or promote products derived from this software without
  23. * prior written permission. For written permission, please contact
  24. * openssl-core@openssl.org.
  25. *
  26. * 5. Products derived from this software may not be called "OpenSSL"
  27. * nor may "OpenSSL" appear in their names without prior written
  28. * permission of the OpenSSL Project.
  29. *
  30. * 6. Redistributions of any form whatsoever must retain the following
  31. * acknowledgment:
  32. * "This product includes software developed by the OpenSSL Project
  33. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  34. *
  35. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  36. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  37. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  38. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  39. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  41. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  42. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  44. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  45. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  46. * OF THE POSSIBILITY OF SUCH DAMAGE.
  47. * ==================================================================== */
  48. #include <openssl/modes.h>
  49. #include <assert.h>
  50. #include <openssl/mem.h>
  51. #include <openssl/cpu.h>
  52. #include "internal.h"
  53. #include "../internal.h"
  54. #if !defined(OPENSSL_NO_ASM) && \
  55. (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM))
  56. #define GHASH_ASM
  57. #endif
  58. #if defined(BSWAP4) && STRICT_ALIGNMENT == 1
  59. /* redefine, because alignment is ensured */
  60. #undef GETU32
  61. #define GETU32(p) BSWAP4(*(const uint32_t *)(p))
  62. #undef PUTU32
  63. #define PUTU32(p, v) *(uint32_t *)(p) = BSWAP4(v)
  64. #endif
  65. #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16))
  66. #define REDUCE1BIT(V) \
  67. do { \
  68. if (sizeof(size_t) == 8) { \
  69. uint64_t T = OPENSSL_U64(0xe100000000000000) & (0 - (V.lo & 1)); \
  70. V.lo = (V.hi << 63) | (V.lo >> 1); \
  71. V.hi = (V.hi >> 1) ^ T; \
  72. } else { \
  73. uint32_t T = 0xe1000000U & (0 - (uint32_t)(V.lo & 1)); \
  74. V.lo = (V.hi << 63) | (V.lo >> 1); \
  75. V.hi = (V.hi >> 1) ^ ((uint64_t)T << 32); \
  76. } \
  77. } while (0)
  78. static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) {
  79. u128 V;
  80. Htable[0].hi = 0;
  81. Htable[0].lo = 0;
  82. V.hi = H[0];
  83. V.lo = H[1];
  84. Htable[8] = V;
  85. REDUCE1BIT(V);
  86. Htable[4] = V;
  87. REDUCE1BIT(V);
  88. Htable[2] = V;
  89. REDUCE1BIT(V);
  90. Htable[1] = V;
  91. Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
  92. V = Htable[4];
  93. Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
  94. Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
  95. Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
  96. V = Htable[8];
  97. Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
  98. Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
  99. Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
  100. Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
  101. Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
  102. Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
  103. Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
  104. #if defined(GHASH_ASM) && defined(OPENSSL_ARM)
  105. /* ARM assembler expects specific dword order in Htable. */
  106. {
  107. int j;
  108. const union {
  109. long one;
  110. char little;
  111. } is_endian = {1};
  112. if (is_endian.little) {
  113. for (j = 0; j < 16; ++j) {
  114. V = Htable[j];
  115. Htable[j].hi = V.lo;
  116. Htable[j].lo = V.hi;
  117. }
  118. } else {
  119. for (j = 0; j < 16; ++j) {
  120. V = Htable[j];
  121. Htable[j].hi = V.lo << 32 | V.lo >> 32;
  122. Htable[j].lo = V.hi << 32 | V.hi >> 32;
  123. }
  124. }
  125. }
  126. #endif
  127. }
  128. #if !defined(GHASH_ASM)
  129. static const size_t rem_4bit[16] = {
  130. PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
  131. PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
  132. PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
  133. PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)};
  134. static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) {
  135. u128 Z;
  136. int cnt = 15;
  137. size_t rem, nlo, nhi;
  138. const union {
  139. long one;
  140. char little;
  141. } is_endian = {1};
  142. nlo = ((const uint8_t *)Xi)[15];
  143. nhi = nlo >> 4;
  144. nlo &= 0xf;
  145. Z.hi = Htable[nlo].hi;
  146. Z.lo = Htable[nlo].lo;
  147. while (1) {
  148. rem = (size_t)Z.lo & 0xf;
  149. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  150. Z.hi = (Z.hi >> 4);
  151. if (sizeof(size_t) == 8) {
  152. Z.hi ^= rem_4bit[rem];
  153. } else {
  154. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  155. }
  156. Z.hi ^= Htable[nhi].hi;
  157. Z.lo ^= Htable[nhi].lo;
  158. if (--cnt < 0) {
  159. break;
  160. }
  161. nlo = ((const uint8_t *)Xi)[cnt];
  162. nhi = nlo >> 4;
  163. nlo &= 0xf;
  164. rem = (size_t)Z.lo & 0xf;
  165. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  166. Z.hi = (Z.hi >> 4);
  167. if (sizeof(size_t) == 8) {
  168. Z.hi ^= rem_4bit[rem];
  169. } else {
  170. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  171. }
  172. Z.hi ^= Htable[nlo].hi;
  173. Z.lo ^= Htable[nlo].lo;
  174. }
  175. if (is_endian.little) {
  176. #ifdef BSWAP8
  177. Xi[0] = BSWAP8(Z.hi);
  178. Xi[1] = BSWAP8(Z.lo);
  179. #else
  180. uint8_t *p = (uint8_t *)Xi;
  181. uint32_t v;
  182. v = (uint32_t)(Z.hi >> 32);
  183. PUTU32(p, v);
  184. v = (uint32_t)(Z.hi);
  185. PUTU32(p + 4, v);
  186. v = (uint32_t)(Z.lo >> 32);
  187. PUTU32(p + 8, v);
  188. v = (uint32_t)(Z.lo);
  189. PUTU32(p + 12, v);
  190. #endif
  191. } else {
  192. Xi[0] = Z.hi;
  193. Xi[1] = Z.lo;
  194. }
  195. }
  196. /* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
  197. * details... Compiler-generated code doesn't seem to give any
  198. * performance improvement, at least not on x86[_64]. It's here
  199. * mostly as reference and a placeholder for possible future
  200. * non-trivial optimization[s]... */
  201. static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  202. size_t len) {
  203. u128 Z;
  204. int cnt;
  205. size_t rem, nlo, nhi;
  206. const union {
  207. long one;
  208. char little;
  209. } is_endian = {1};
  210. do {
  211. cnt = 15;
  212. nlo = ((const uint8_t *)Xi)[15];
  213. nlo ^= inp[15];
  214. nhi = nlo >> 4;
  215. nlo &= 0xf;
  216. Z.hi = Htable[nlo].hi;
  217. Z.lo = Htable[nlo].lo;
  218. while (1) {
  219. rem = (size_t)Z.lo & 0xf;
  220. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  221. Z.hi = (Z.hi >> 4);
  222. if (sizeof(size_t) == 8) {
  223. Z.hi ^= rem_4bit[rem];
  224. } else {
  225. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  226. }
  227. Z.hi ^= Htable[nhi].hi;
  228. Z.lo ^= Htable[nhi].lo;
  229. if (--cnt < 0) {
  230. break;
  231. }
  232. nlo = ((const uint8_t *)Xi)[cnt];
  233. nlo ^= inp[cnt];
  234. nhi = nlo >> 4;
  235. nlo &= 0xf;
  236. rem = (size_t)Z.lo & 0xf;
  237. Z.lo = (Z.hi << 60) | (Z.lo >> 4);
  238. Z.hi = (Z.hi >> 4);
  239. if (sizeof(size_t) == 8) {
  240. Z.hi ^= rem_4bit[rem];
  241. } else {
  242. Z.hi ^= (uint64_t)rem_4bit[rem] << 32;
  243. }
  244. Z.hi ^= Htable[nlo].hi;
  245. Z.lo ^= Htable[nlo].lo;
  246. }
  247. if (is_endian.little) {
  248. #ifdef BSWAP8
  249. Xi[0] = BSWAP8(Z.hi);
  250. Xi[1] = BSWAP8(Z.lo);
  251. #else
  252. uint8_t *p = (uint8_t *)Xi;
  253. uint32_t v;
  254. v = (uint32_t)(Z.hi >> 32);
  255. PUTU32(p, v);
  256. v = (uint32_t)(Z.hi);
  257. PUTU32(p + 4, v);
  258. v = (uint32_t)(Z.lo >> 32);
  259. PUTU32(p + 8, v);
  260. v = (uint32_t)(Z.lo);
  261. PUTU32(p + 12, v);
  262. #endif
  263. } else {
  264. Xi[0] = Z.hi;
  265. Xi[1] = Z.lo;
  266. }
  267. } while (inp += 16, len -= 16);
  268. }
  269. #else /* GHASH_ASM */
  270. void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]);
  271. void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  272. size_t len);
  273. #endif
  274. #define GCM_MUL(ctx, Xi) gcm_gmult_4bit(ctx->Xi.u, ctx->Htable)
  275. #if defined(GHASH_ASM)
  276. #define GHASH(ctx, in, len) gcm_ghash_4bit((ctx)->Xi.u, (ctx)->Htable, in, len)
  277. /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
  278. * trashing effect. In other words idea is to hash data while it's
  279. * still in L1 cache after encryption pass... */
  280. #define GHASH_CHUNK (3 * 1024)
  281. #endif
  282. #if defined(GHASH_ASM)
  283. #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
  284. #define GHASH_ASM_X86_OR_64
  285. #define GCM_FUNCREF_4BIT
  286. void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
  287. void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
  288. void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  289. size_t len);
  290. #if defined(OPENSSL_X86)
  291. #define gcm_init_avx gcm_init_clmul
  292. #define gcm_gmult_avx gcm_gmult_clmul
  293. #define gcm_ghash_avx gcm_ghash_clmul
  294. #else
  295. void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
  296. void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
  297. void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, size_t len);
  298. #endif
  299. #if defined(OPENSSL_X86)
  300. #define GHASH_ASM_X86
  301. void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]);
  302. void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  303. size_t len);
  304. void gcm_gmult_4bit_x86(uint64_t Xi[2], const u128 Htable[16]);
  305. void gcm_ghash_4bit_x86(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  306. size_t len);
  307. #endif
  308. #elif defined(OPENSSL_ARM)
  309. #include "../arm_arch.h"
  310. #if __ARM_ARCH__ >= 7
  311. #define GHASH_ASM_ARM
  312. #define GCM_FUNCREF_4BIT
  313. void gcm_init_neon(u128 Htable[16],const uint64_t Xi[2]);
  314. void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
  315. void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  316. size_t len);
  317. #endif
  318. #endif
  319. #endif
  320. #ifdef GCM_FUNCREF_4BIT
  321. #undef GCM_MUL
  322. #define GCM_MUL(ctx, Xi) (*gcm_gmult_p)(ctx->Xi.u, ctx->Htable)
  323. #ifdef GHASH
  324. #undef GHASH
  325. #define GHASH(ctx, in, len) (*gcm_ghash_p)(ctx->Xi.u, ctx->Htable, in, len)
  326. #endif
  327. #endif
  328. GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) {
  329. GCM128_CONTEXT *ret;
  330. ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT));
  331. if (ret != NULL) {
  332. CRYPTO_gcm128_init(ret, key, block);
  333. }
  334. return ret;
  335. }
  336. void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block) {
  337. const union {
  338. long one;
  339. char little;
  340. } is_endian = {1};
  341. memset(ctx, 0, sizeof(*ctx));
  342. ctx->block = block;
  343. ctx->key = key;
  344. (*block)(ctx->H.c, ctx->H.c, key);
  345. if (is_endian.little) {
  346. /* H is stored in host byte order */
  347. #ifdef BSWAP8
  348. ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
  349. ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
  350. #else
  351. uint8_t *p = ctx->H.c;
  352. uint64_t hi, lo;
  353. hi = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
  354. lo = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
  355. ctx->H.u[0] = hi;
  356. ctx->H.u[1] = lo;
  357. #endif
  358. }
  359. #if defined(GHASH_ASM_X86_OR_64)
  360. if (crypto_gcm_clmul_enabled()) {
  361. if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
  362. gcm_init_avx(ctx->Htable, ctx->H.u);
  363. ctx->gmult = gcm_gmult_avx;
  364. ctx->ghash = gcm_ghash_avx;
  365. } else {
  366. gcm_init_clmul(ctx->Htable, ctx->H.u);
  367. ctx->gmult = gcm_gmult_clmul;
  368. ctx->ghash = gcm_ghash_clmul;
  369. }
  370. return;
  371. }
  372. gcm_init_4bit(ctx->Htable, ctx->H.u);
  373. #if defined(GHASH_ASM_X86) /* x86 only */
  374. if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
  375. ctx->gmult = gcm_gmult_4bit_mmx;
  376. ctx->ghash = gcm_ghash_4bit_mmx;
  377. } else {
  378. ctx->gmult = gcm_gmult_4bit_x86;
  379. ctx->ghash = gcm_ghash_4bit_x86;
  380. }
  381. #else
  382. ctx->gmult = gcm_gmult_4bit;
  383. ctx->ghash = gcm_ghash_4bit;
  384. #endif
  385. #elif defined(GHASH_ASM_ARM)
  386. if (CRYPTO_is_NEON_capable()) {
  387. gcm_init_neon(ctx->Htable,ctx->H.u);
  388. ctx->gmult = gcm_gmult_neon;
  389. ctx->ghash = gcm_ghash_neon;
  390. } else {
  391. gcm_init_4bit(ctx->Htable, ctx->H.u);
  392. ctx->gmult = gcm_gmult_4bit;
  393. ctx->ghash = gcm_ghash_4bit;
  394. }
  395. #else
  396. ctx->gmult = gcm_gmult_4bit;
  397. ctx->ghash = gcm_ghash_4bit;
  398. gcm_init_4bit(ctx->Htable, ctx->H.u);
  399. #endif
  400. }
  401. void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const uint8_t *iv, size_t len) {
  402. const union {
  403. long one;
  404. char little;
  405. } is_endian = {1};
  406. unsigned int ctr;
  407. #ifdef GCM_FUNCREF_4BIT
  408. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  409. #endif
  410. ctx->Yi.u[0] = 0;
  411. ctx->Yi.u[1] = 0;
  412. ctx->Xi.u[0] = 0;
  413. ctx->Xi.u[1] = 0;
  414. ctx->len.u[0] = 0; /* AAD length */
  415. ctx->len.u[1] = 0; /* message length */
  416. ctx->ares = 0;
  417. ctx->mres = 0;
  418. if (len == 12) {
  419. memcpy(ctx->Yi.c, iv, 12);
  420. ctx->Yi.c[15] = 1;
  421. ctr = 1;
  422. } else {
  423. size_t i;
  424. uint64_t len0 = len;
  425. while (len >= 16) {
  426. for (i = 0; i < 16; ++i) {
  427. ctx->Yi.c[i] ^= iv[i];
  428. }
  429. GCM_MUL(ctx, Yi);
  430. iv += 16;
  431. len -= 16;
  432. }
  433. if (len) {
  434. for (i = 0; i < len; ++i) {
  435. ctx->Yi.c[i] ^= iv[i];
  436. }
  437. GCM_MUL(ctx, Yi);
  438. }
  439. len0 <<= 3;
  440. if (is_endian.little) {
  441. #ifdef BSWAP8
  442. ctx->Yi.u[1] ^= BSWAP8(len0);
  443. #else
  444. ctx->Yi.c[8] ^= (uint8_t)(len0 >> 56);
  445. ctx->Yi.c[9] ^= (uint8_t)(len0 >> 48);
  446. ctx->Yi.c[10] ^= (uint8_t)(len0 >> 40);
  447. ctx->Yi.c[11] ^= (uint8_t)(len0 >> 32);
  448. ctx->Yi.c[12] ^= (uint8_t)(len0 >> 24);
  449. ctx->Yi.c[13] ^= (uint8_t)(len0 >> 16);
  450. ctx->Yi.c[14] ^= (uint8_t)(len0 >> 8);
  451. ctx->Yi.c[15] ^= (uint8_t)(len0);
  452. #endif
  453. } else {
  454. ctx->Yi.u[1] ^= len0;
  455. }
  456. GCM_MUL(ctx, Yi);
  457. if (is_endian.little) {
  458. ctr = GETU32(ctx->Yi.c + 12);
  459. } else {
  460. ctr = ctx->Yi.d[3];
  461. }
  462. }
  463. (*ctx->block)(ctx->Yi.c, ctx->EK0.c, ctx->key);
  464. ++ctr;
  465. if (is_endian.little) {
  466. PUTU32(ctx->Yi.c + 12, ctr);
  467. } else {
  468. ctx->Yi.d[3] = ctr;
  469. }
  470. }
  471. int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
  472. size_t i;
  473. unsigned int n;
  474. uint64_t alen = ctx->len.u[0];
  475. #ifdef GCM_FUNCREF_4BIT
  476. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  477. #ifdef GHASH
  478. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  479. size_t len) = ctx->ghash;
  480. #endif
  481. #endif
  482. if (ctx->len.u[1]) {
  483. return 0;
  484. }
  485. alen += len;
  486. if (alen > (OPENSSL_U64(1) << 61) || (sizeof(len) == 8 && alen < len)) {
  487. return 0;
  488. }
  489. ctx->len.u[0] = alen;
  490. n = ctx->ares;
  491. if (n) {
  492. while (n && len) {
  493. ctx->Xi.c[n] ^= *(aad++);
  494. --len;
  495. n = (n + 1) % 16;
  496. }
  497. if (n == 0) {
  498. GCM_MUL(ctx, Xi);
  499. } else {
  500. ctx->ares = n;
  501. return 1;
  502. }
  503. }
  504. #ifdef GHASH
  505. if ((i = (len & (size_t) - 16))) {
  506. GHASH(ctx, aad, i);
  507. aad += i;
  508. len -= i;
  509. }
  510. #else
  511. while (len >= 16) {
  512. for (i = 0; i < 16; ++i) {
  513. ctx->Xi.c[i] ^= aad[i];
  514. }
  515. GCM_MUL(ctx, Xi);
  516. aad += 16;
  517. len -= 16;
  518. }
  519. #endif
  520. if (len) {
  521. n = (unsigned int)len;
  522. for (i = 0; i < len; ++i)
  523. ctx->Xi.c[i] ^= aad[i];
  524. }
  525. ctx->ares = n;
  526. return 1;
  527. }
  528. int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
  529. unsigned char *out, size_t len) {
  530. const union {
  531. long one;
  532. char little;
  533. } is_endian = {1};
  534. unsigned int n, ctr;
  535. size_t i;
  536. uint64_t mlen = ctx->len.u[1];
  537. block128_f block = ctx->block;
  538. void *key = ctx->key;
  539. #ifdef GCM_FUNCREF_4BIT
  540. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  541. #ifdef GHASH
  542. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  543. size_t len) = ctx->ghash;
  544. #endif
  545. #endif
  546. mlen += len;
  547. if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
  548. (sizeof(len) == 8 && mlen < len)) {
  549. return 0;
  550. }
  551. ctx->len.u[1] = mlen;
  552. if (ctx->ares) {
  553. /* First call to encrypt finalizes GHASH(AAD) */
  554. GCM_MUL(ctx, Xi);
  555. ctx->ares = 0;
  556. }
  557. if (is_endian.little) {
  558. ctr = GETU32(ctx->Yi.c + 12);
  559. } else {
  560. ctr = ctx->Yi.d[3];
  561. }
  562. n = ctx->mres;
  563. if (n) {
  564. while (n && len) {
  565. ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
  566. --len;
  567. n = (n + 1) % 16;
  568. }
  569. if (n == 0) {
  570. GCM_MUL(ctx, Xi);
  571. } else {
  572. ctx->mres = n;
  573. return 1;
  574. }
  575. }
  576. if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
  577. for (i = 0; i < len; ++i) {
  578. if (n == 0) {
  579. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  580. ++ctr;
  581. if (is_endian.little) {
  582. PUTU32(ctx->Yi.c + 12, ctr);
  583. } else {
  584. ctx->Yi.d[3] = ctr;
  585. }
  586. }
  587. ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
  588. n = (n + 1) % 16;
  589. if (n == 0) {
  590. GCM_MUL(ctx, Xi);
  591. }
  592. }
  593. ctx->mres = n;
  594. return 1;
  595. }
  596. #if defined(GHASH) && defined(GHASH_CHUNK)
  597. while (len >= GHASH_CHUNK) {
  598. size_t j = GHASH_CHUNK;
  599. while (j) {
  600. size_t *out_t = (size_t *)out;
  601. const size_t *in_t = (const size_t *)in;
  602. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  603. ++ctr;
  604. if (is_endian.little) {
  605. PUTU32(ctx->Yi.c + 12, ctr);
  606. } else {
  607. ctx->Yi.d[3] = ctr;
  608. }
  609. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  610. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  611. }
  612. out += 16;
  613. in += 16;
  614. j -= 16;
  615. }
  616. GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
  617. len -= GHASH_CHUNK;
  618. }
  619. if ((i = (len & (size_t) - 16))) {
  620. size_t j = i;
  621. while (len >= 16) {
  622. size_t *out_t = (size_t *)out;
  623. const size_t *in_t = (const size_t *)in;
  624. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  625. ++ctr;
  626. if (is_endian.little) {
  627. PUTU32(ctx->Yi.c + 12, ctr);
  628. } else {
  629. ctx->Yi.d[3] = ctr;
  630. }
  631. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  632. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  633. }
  634. out += 16;
  635. in += 16;
  636. len -= 16;
  637. }
  638. GHASH(ctx, out - j, j);
  639. }
  640. #else
  641. while (len >= 16) {
  642. size_t *out_t = (size_t *)out;
  643. const size_t *in_t = (const size_t *)in;
  644. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  645. ++ctr;
  646. if (is_endian.little) {
  647. PUTU32(ctx->Yi.c + 12, ctr);
  648. } else {
  649. ctx->Yi.d[3] = ctr;
  650. }
  651. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  652. ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  653. }
  654. GCM_MUL(ctx, Xi);
  655. out += 16;
  656. in += 16;
  657. len -= 16;
  658. }
  659. #endif
  660. if (len) {
  661. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  662. ++ctr;
  663. if (is_endian.little) {
  664. PUTU32(ctx->Yi.c + 12, ctr);
  665. } else {
  666. ctx->Yi.d[3] = ctr;
  667. }
  668. while (len--) {
  669. ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
  670. ++n;
  671. }
  672. }
  673. ctx->mres = n;
  674. return 1;
  675. }
  676. int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const unsigned char *in,
  677. unsigned char *out, size_t len) {
  678. const union {
  679. long one;
  680. char little;
  681. } is_endian = {1};
  682. unsigned int n, ctr;
  683. size_t i;
  684. uint64_t mlen = ctx->len.u[1];
  685. block128_f block = ctx->block;
  686. void *key = ctx->key;
  687. #ifdef GCM_FUNCREF_4BIT
  688. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  689. #ifdef GHASH
  690. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  691. size_t len) = ctx->ghash;
  692. #endif
  693. #endif
  694. mlen += len;
  695. if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
  696. (sizeof(len) == 8 && mlen < len)) {
  697. return 0;
  698. }
  699. ctx->len.u[1] = mlen;
  700. if (ctx->ares) {
  701. /* First call to decrypt finalizes GHASH(AAD) */
  702. GCM_MUL(ctx, Xi);
  703. ctx->ares = 0;
  704. }
  705. if (is_endian.little) {
  706. ctr = GETU32(ctx->Yi.c + 12);
  707. } else {
  708. ctr = ctx->Yi.d[3];
  709. }
  710. n = ctx->mres;
  711. if (n) {
  712. while (n && len) {
  713. uint8_t c = *(in++);
  714. *(out++) = c ^ ctx->EKi.c[n];
  715. ctx->Xi.c[n] ^= c;
  716. --len;
  717. n = (n + 1) % 16;
  718. }
  719. if (n == 0) {
  720. GCM_MUL(ctx, Xi);
  721. } else {
  722. ctx->mres = n;
  723. return 1;
  724. }
  725. }
  726. if (STRICT_ALIGNMENT && ((size_t)in | (size_t)out) % sizeof(size_t) != 0) {
  727. for (i = 0; i < len; ++i) {
  728. uint8_t c;
  729. if (n == 0) {
  730. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  731. ++ctr;
  732. if (is_endian.little) {
  733. PUTU32(ctx->Yi.c + 12, ctr);
  734. } else {
  735. ctx->Yi.d[3] = ctr;
  736. }
  737. }
  738. c = in[i];
  739. out[i] = c ^ ctx->EKi.c[n];
  740. ctx->Xi.c[n] ^= c;
  741. n = (n + 1) % 16;
  742. if (n == 0) {
  743. GCM_MUL(ctx, Xi);
  744. }
  745. }
  746. ctx->mres = n;
  747. return 1;
  748. }
  749. #if defined(GHASH) && defined(GHASH_CHUNK)
  750. while (len >= GHASH_CHUNK) {
  751. size_t j = GHASH_CHUNK;
  752. GHASH(ctx, in, GHASH_CHUNK);
  753. while (j) {
  754. size_t *out_t = (size_t *)out;
  755. const size_t *in_t = (const size_t *)in;
  756. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  757. ++ctr;
  758. if (is_endian.little) {
  759. PUTU32(ctx->Yi.c + 12, ctr);
  760. } else {
  761. ctx->Yi.d[3] = ctr;
  762. }
  763. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  764. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  765. }
  766. out += 16;
  767. in += 16;
  768. j -= 16;
  769. }
  770. len -= GHASH_CHUNK;
  771. }
  772. if ((i = (len & (size_t) - 16))) {
  773. GHASH(ctx, in, i);
  774. while (len >= 16) {
  775. size_t *out_t = (size_t *)out;
  776. const size_t *in_t = (const size_t *)in;
  777. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  778. ++ctr;
  779. if (is_endian.little) {
  780. PUTU32(ctx->Yi.c + 12, ctr);
  781. } else {
  782. ctx->Yi.d[3] = ctr;
  783. }
  784. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  785. out_t[i] = in_t[i] ^ ctx->EKi.t[i];
  786. }
  787. out += 16;
  788. in += 16;
  789. len -= 16;
  790. }
  791. }
  792. #else
  793. while (len >= 16) {
  794. size_t *out_t = (size_t *)out;
  795. const size_t *in_t = (const size_t *)in;
  796. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  797. ++ctr;
  798. if (is_endian.little) {
  799. PUTU32(ctx->Yi.c + 12, ctr);
  800. } else {
  801. ctx->Yi.d[3] = ctr;
  802. }
  803. for (i = 0; i < 16 / sizeof(size_t); ++i) {
  804. size_t c = in_t[i];
  805. out_t[i] = c ^ ctx->EKi.t[i];
  806. ctx->Xi.t[i] ^= c;
  807. }
  808. GCM_MUL(ctx, Xi);
  809. out += 16;
  810. in += 16;
  811. len -= 16;
  812. }
  813. #endif
  814. if (len) {
  815. (*block)(ctx->Yi.c, ctx->EKi.c, key);
  816. ++ctr;
  817. if (is_endian.little) {
  818. PUTU32(ctx->Yi.c + 12, ctr);
  819. } else {
  820. ctx->Yi.d[3] = ctr;
  821. }
  822. while (len--) {
  823. uint8_t c = in[n];
  824. ctx->Xi.c[n] ^= c;
  825. out[n] = c ^ ctx->EKi.c[n];
  826. ++n;
  827. }
  828. }
  829. ctx->mres = n;
  830. return 1;
  831. }
  832. int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const uint8_t *in,
  833. uint8_t *out, size_t len, ctr128_f stream) {
  834. const union {
  835. long one;
  836. char little;
  837. } is_endian = {1};
  838. unsigned int n, ctr;
  839. size_t i;
  840. uint64_t mlen = ctx->len.u[1];
  841. void *key = ctx->key;
  842. #ifdef GCM_FUNCREF_4BIT
  843. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  844. #ifdef GHASH
  845. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  846. size_t len) = ctx->ghash;
  847. #endif
  848. #endif
  849. mlen += len;
  850. if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
  851. (sizeof(len) == 8 && mlen < len)) {
  852. return 0;
  853. }
  854. ctx->len.u[1] = mlen;
  855. if (ctx->ares) {
  856. /* First call to encrypt finalizes GHASH(AAD) */
  857. GCM_MUL(ctx, Xi);
  858. ctx->ares = 0;
  859. }
  860. if (is_endian.little) {
  861. ctr = GETU32(ctx->Yi.c + 12);
  862. } else {
  863. ctr = ctx->Yi.d[3];
  864. }
  865. n = ctx->mres;
  866. if (n) {
  867. while (n && len) {
  868. ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
  869. --len;
  870. n = (n + 1) % 16;
  871. }
  872. if (n == 0) {
  873. GCM_MUL(ctx, Xi);
  874. } else {
  875. ctx->mres = n;
  876. return 1;
  877. }
  878. }
  879. #if defined(GHASH)
  880. while (len >= GHASH_CHUNK) {
  881. (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
  882. ctr += GHASH_CHUNK / 16;
  883. if (is_endian.little) {
  884. PUTU32(ctx->Yi.c + 12, ctr);
  885. } else {
  886. ctx->Yi.d[3] = ctr;
  887. }
  888. GHASH(ctx, out, GHASH_CHUNK);
  889. out += GHASH_CHUNK;
  890. in += GHASH_CHUNK;
  891. len -= GHASH_CHUNK;
  892. }
  893. #endif
  894. if ((i = (len & (size_t) - 16))) {
  895. size_t j = i / 16;
  896. (*stream)(in, out, j, key, ctx->Yi.c);
  897. ctr += (unsigned int)j;
  898. if (is_endian.little) {
  899. PUTU32(ctx->Yi.c + 12, ctr);
  900. } else {
  901. ctx->Yi.d[3] = ctr;
  902. }
  903. in += i;
  904. len -= i;
  905. #if defined(GHASH)
  906. GHASH(ctx, out, i);
  907. out += i;
  908. #else
  909. while (j--) {
  910. for (i = 0; i < 16; ++i) {
  911. ctx->Xi.c[i] ^= out[i];
  912. }
  913. GCM_MUL(ctx, Xi);
  914. out += 16;
  915. }
  916. #endif
  917. }
  918. if (len) {
  919. (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
  920. ++ctr;
  921. if (is_endian.little) {
  922. PUTU32(ctx->Yi.c + 12, ctr);
  923. } else {
  924. ctx->Yi.d[3] = ctr;
  925. }
  926. while (len--) {
  927. ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
  928. ++n;
  929. }
  930. }
  931. ctx->mres = n;
  932. return 1;
  933. }
  934. int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const uint8_t *in,
  935. uint8_t *out, size_t len,
  936. ctr128_f stream) {
  937. const union {
  938. long one;
  939. char little;
  940. } is_endian = {1};
  941. unsigned int n, ctr;
  942. size_t i;
  943. uint64_t mlen = ctx->len.u[1];
  944. void *key = ctx->key;
  945. #ifdef GCM_FUNCREF_4BIT
  946. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  947. #ifdef GHASH
  948. void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
  949. size_t len) = ctx->ghash;
  950. #endif
  951. #endif
  952. mlen += len;
  953. if (mlen > ((OPENSSL_U64(1) << 36) - 32) ||
  954. (sizeof(len) == 8 && mlen < len)) {
  955. return 0;
  956. }
  957. ctx->len.u[1] = mlen;
  958. if (ctx->ares) {
  959. /* First call to decrypt finalizes GHASH(AAD) */
  960. GCM_MUL(ctx, Xi);
  961. ctx->ares = 0;
  962. }
  963. if (is_endian.little) {
  964. ctr = GETU32(ctx->Yi.c + 12);
  965. } else {
  966. ctr = ctx->Yi.d[3];
  967. }
  968. n = ctx->mres;
  969. if (n) {
  970. while (n && len) {
  971. uint8_t c = *(in++);
  972. *(out++) = c ^ ctx->EKi.c[n];
  973. ctx->Xi.c[n] ^= c;
  974. --len;
  975. n = (n + 1) % 16;
  976. }
  977. if (n == 0) {
  978. GCM_MUL(ctx, Xi);
  979. } else {
  980. ctx->mres = n;
  981. return 1;
  982. }
  983. }
  984. #if defined(GHASH)
  985. while (len >= GHASH_CHUNK) {
  986. GHASH(ctx, in, GHASH_CHUNK);
  987. (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
  988. ctr += GHASH_CHUNK / 16;
  989. if (is_endian.little)
  990. PUTU32(ctx->Yi.c + 12, ctr);
  991. else
  992. ctx->Yi.d[3] = ctr;
  993. out += GHASH_CHUNK;
  994. in += GHASH_CHUNK;
  995. len -= GHASH_CHUNK;
  996. }
  997. #endif
  998. if ((i = (len & (size_t) - 16))) {
  999. size_t j = i / 16;
  1000. #if defined(GHASH)
  1001. GHASH(ctx, in, i);
  1002. #else
  1003. while (j--) {
  1004. size_t k;
  1005. for (k = 0; k < 16; ++k)
  1006. ctx->Xi.c[k] ^= in[k];
  1007. GCM_MUL(ctx, Xi);
  1008. in += 16;
  1009. }
  1010. j = i / 16;
  1011. in -= i;
  1012. #endif
  1013. (*stream)(in, out, j, key, ctx->Yi.c);
  1014. ctr += (unsigned int)j;
  1015. if (is_endian.little)
  1016. PUTU32(ctx->Yi.c + 12, ctr);
  1017. else
  1018. ctx->Yi.d[3] = ctr;
  1019. out += i;
  1020. in += i;
  1021. len -= i;
  1022. }
  1023. if (len) {
  1024. (*ctx->block)(ctx->Yi.c, ctx->EKi.c, key);
  1025. ++ctr;
  1026. if (is_endian.little)
  1027. PUTU32(ctx->Yi.c + 12, ctr);
  1028. else
  1029. ctx->Yi.d[3] = ctr;
  1030. while (len--) {
  1031. uint8_t c = in[n];
  1032. ctx->Xi.c[n] ^= c;
  1033. out[n] = c ^ ctx->EKi.c[n];
  1034. ++n;
  1035. }
  1036. }
  1037. ctx->mres = n;
  1038. return 1;
  1039. }
  1040. int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
  1041. const union {
  1042. long one;
  1043. char little;
  1044. } is_endian = {1};
  1045. uint64_t alen = ctx->len.u[0] << 3;
  1046. uint64_t clen = ctx->len.u[1] << 3;
  1047. #ifdef GCM_FUNCREF_4BIT
  1048. void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) = ctx->gmult;
  1049. #endif
  1050. if (ctx->mres || ctx->ares) {
  1051. GCM_MUL(ctx, Xi);
  1052. }
  1053. if (is_endian.little) {
  1054. #ifdef BSWAP8
  1055. alen = BSWAP8(alen);
  1056. clen = BSWAP8(clen);
  1057. #else
  1058. uint8_t *p = ctx->len.c;
  1059. ctx->len.u[0] = alen;
  1060. ctx->len.u[1] = clen;
  1061. alen = (uint64_t)GETU32(p) << 32 | GETU32(p + 4);
  1062. clen = (uint64_t)GETU32(p + 8) << 32 | GETU32(p + 12);
  1063. #endif
  1064. }
  1065. ctx->Xi.u[0] ^= alen;
  1066. ctx->Xi.u[1] ^= clen;
  1067. GCM_MUL(ctx, Xi);
  1068. ctx->Xi.u[0] ^= ctx->EK0.u[0];
  1069. ctx->Xi.u[1] ^= ctx->EK0.u[1];
  1070. if (tag && len <= sizeof(ctx->Xi)) {
  1071. return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
  1072. } else {
  1073. return 0;
  1074. }
  1075. }
  1076. void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
  1077. CRYPTO_gcm128_finish(ctx, NULL, 0);
  1078. memcpy(tag, ctx->Xi.c, len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
  1079. }
  1080. void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) {
  1081. if (ctx) {
  1082. OPENSSL_cleanse(ctx, sizeof(*ctx));
  1083. OPENSSL_free(ctx);
  1084. }
  1085. }
  1086. #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
  1087. int crypto_gcm_clmul_enabled(void) {
  1088. #ifdef GHASH_ASM
  1089. return OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
  1090. OPENSSL_ia32cap_P[1] & (1 << 1); /* check PCLMULQDQ bit */
  1091. #else
  1092. return 0;
  1093. #endif
  1094. }
  1095. #endif